cache-tree.con commit index: make the index file format extensible. (bad68ec)
   1#include "cache.h"
   2#include "tree.h"
   3#include "cache-tree.h"
   4
   5#define DEBUG 0
   6
   7struct cache_tree *cache_tree(void)
   8{
   9        struct cache_tree *it = xcalloc(1, sizeof(struct cache_tree));
  10        it->entry_count = -1;
  11        return it;
  12}
  13
  14void cache_tree_free(struct cache_tree **it_p)
  15{
  16        int i;
  17        struct cache_tree *it = *it_p;
  18
  19        if (!it)
  20                return;
  21        for (i = 0; i < it->subtree_nr; i++)
  22                cache_tree_free(&it->down[i]->cache_tree);
  23        free(it->down);
  24        free(it);
  25        *it_p = NULL;
  26}
  27
  28static struct cache_tree_sub *find_subtree(struct cache_tree *it,
  29                                           const char *path,
  30                                           int pathlen,
  31                                           int create)
  32{
  33        int i;
  34        struct cache_tree_sub *down;
  35        for (i = 0; i < it->subtree_nr; i++) {
  36                down = it->down[i];
  37                if (down->namelen == pathlen &&
  38                    !memcmp(down->name, path, pathlen))
  39                        return down;
  40        }
  41        if (!create)
  42                return NULL;
  43        if (it->subtree_alloc <= it->subtree_nr) {
  44                it->subtree_alloc = alloc_nr(it->subtree_alloc);
  45                it->down = xrealloc(it->down, it->subtree_alloc *
  46                                    sizeof(*it->down));
  47        }
  48        down = xmalloc(sizeof(*down) + pathlen + 1);
  49        down->cache_tree = NULL; /* cache_tree(); */
  50        down->namelen = pathlen;
  51        memcpy(down->name, path, pathlen);
  52        down->name[pathlen] = 0; /* not strictly needed */
  53        it->down[it->subtree_nr++] = down;
  54        return down;
  55}
  56
  57void cache_tree_invalidate_path(struct cache_tree *it, const char *path)
  58{
  59        /* a/b/c
  60         * ==> invalidate self
  61         * ==> find "a", have it invalidate "b/c"
  62         * a
  63         * ==> invalidate self
  64         * ==> if "a" exists as a subtree, remove it.
  65         */
  66        const char *slash;
  67        int namelen;
  68        struct cache_tree_sub *down;
  69
  70        if (!it)
  71                return;
  72        slash = strchr(path, '/');
  73        it->entry_count = -1;
  74        if (!slash) {
  75                int i;
  76                namelen = strlen(path);
  77                for (i = 0; i < it->subtree_nr; i++) {
  78                        if (it->down[i]->namelen == namelen &&
  79                            !memcmp(it->down[i]->name, path, namelen))
  80                                break;
  81                }
  82                if (i < it->subtree_nr) {
  83                        cache_tree_free(&it->down[i]->cache_tree);
  84                        free(it->down[i]);
  85                        /* 0 1 2 3 4 5
  86                         *       ^     ^subtree_nr = 6
  87                         *       i
  88                         * move 4 and 5 up one place (2 entries)
  89                         * 2 = 6 - 3 - 1 = subtree_nr - i - 1
  90                         */
  91                        memmove(it->down+i, it->down+i+1,
  92                                sizeof(struct cache_tree_sub *) *
  93                                (it->subtree_nr - i - 1));
  94                        it->subtree_nr--;
  95                }
  96                return;
  97        }
  98        namelen = slash - path;
  99        down = find_subtree(it, path, namelen, 0);
 100        if (down)
 101                cache_tree_invalidate_path(down->cache_tree, slash + 1);
 102}
 103
 104static int verify_cache(struct cache_entry **cache,
 105                        int entries)
 106{
 107        int i, funny;
 108
 109        /* Verify that the tree is merged */
 110        funny = 0;
 111        for (i = 0; i < entries; i++) {
 112                struct cache_entry *ce = cache[i];
 113                if (ce_stage(ce)) {
 114                        if (10 < ++funny) {
 115                                fprintf(stderr, "...\n");
 116                                break;
 117                        }
 118                        fprintf(stderr, "%s: unmerged (%s)\n",
 119                                ce->name, sha1_to_hex(ce->sha1));
 120                }
 121        }
 122        if (funny)
 123                return -1;
 124
 125        /* Also verify that the cache does not have path and path/file
 126         * at the same time.  At this point we know the cache has only
 127         * stage 0 entries.
 128         */
 129        funny = 0;
 130        for (i = 0; i < entries - 1; i++) {
 131                /* path/file always comes after path because of the way
 132                 * the cache is sorted.  Also path can appear only once,
 133                 * which means conflicting one would immediately follow.
 134                 */
 135                const char *this_name = cache[i]->name;
 136                const char *next_name = cache[i+1]->name;
 137                int this_len = strlen(this_name);
 138                if (this_len < strlen(next_name) &&
 139                    strncmp(this_name, next_name, this_len) == 0 &&
 140                    next_name[this_len] == '/') {
 141                        if (10 < ++funny) {
 142                                fprintf(stderr, "...\n");
 143                                break;
 144                        }
 145                        fprintf(stderr, "You have both %s and %s\n",
 146                                this_name, next_name);
 147                }
 148        }
 149        if (funny)
 150                return -1;
 151        return 0;
 152}
 153
 154static void discard_unused_subtrees(struct cache_tree *it)
 155{
 156        struct cache_tree_sub **down = it->down;
 157        int nr = it->subtree_nr;
 158        int dst, src;
 159        for (dst = src = 0; src < nr; src++) {
 160                struct cache_tree_sub *s = down[src];
 161                if (s->used)
 162                        down[dst++] = s;
 163                else {
 164                        cache_tree_free(&s->cache_tree);
 165                        free(s);
 166                        it->subtree_nr--;
 167                }
 168        }
 169}
 170
 171int cache_tree_fully_valid(struct cache_tree *it)
 172{
 173        int i;
 174        if (!it)
 175                return 0;
 176        if (it->entry_count < 0 || !has_sha1_file(it->sha1))
 177                return 0;
 178        for (i = 0; i < it->subtree_nr; i++) {
 179                if (!cache_tree_fully_valid(it->down[i]->cache_tree))
 180                        return 0;
 181        }
 182        return 1;
 183}
 184
 185static int update_one(struct cache_tree *it,
 186                      struct cache_entry **cache,
 187                      int entries,
 188                      const char *base,
 189                      int baselen,
 190                      int missing_ok)
 191{
 192        unsigned long size, offset;
 193        char *buffer;
 194        int i;
 195
 196        if (0 <= it->entry_count && has_sha1_file(it->sha1))
 197                return it->entry_count;
 198
 199        /*
 200         * We first scan for subtrees and update them; we start by
 201         * marking existing subtrees -- the ones that are unmarked
 202         * should not be in the result.
 203         */
 204        for (i = 0; i < it->subtree_nr; i++)
 205                it->down[i]->used = 0;
 206
 207        /*
 208         * Find the subtrees and update them.
 209         */
 210        for (i = 0; i < entries; i++) {
 211                struct cache_entry *ce = cache[i];
 212                struct cache_tree_sub *sub;
 213                const char *path, *slash;
 214                int pathlen, sublen, subcnt;
 215
 216                path = ce->name;
 217                pathlen = ce_namelen(ce);
 218                if (pathlen <= baselen || memcmp(base, path, baselen))
 219                        break; /* at the end of this level */
 220
 221                slash = strchr(path + baselen, '/');
 222                if (!slash)
 223                        continue;
 224                /*
 225                 * a/bbb/c (base = a/, slash = /c)
 226                 * ==>
 227                 * path+baselen = bbb/c, sublen = 3
 228                 */
 229                sublen = slash - (path + baselen);
 230                sub = find_subtree(it, path + baselen, sublen, 1);
 231                if (!sub->cache_tree)
 232                        sub->cache_tree = cache_tree();
 233                subcnt = update_one(sub->cache_tree,
 234                                    cache + i, entries - i,
 235                                    path,
 236                                    baselen + sublen + 1,
 237                                    missing_ok);
 238                i += subcnt - 1;
 239                sub->used = 1;
 240        }
 241
 242        discard_unused_subtrees(it);
 243
 244        /*
 245         * Then write out the tree object for this level.
 246         */
 247        size = 8192;
 248        buffer = xmalloc(size);
 249        offset = 0;
 250
 251        for (i = 0; i < entries; i++) {
 252                struct cache_entry *ce = cache[i];
 253                struct cache_tree_sub *sub;
 254                const char *path, *slash;
 255                int pathlen, entlen;
 256                const unsigned char *sha1;
 257                unsigned mode;
 258
 259                path = ce->name;
 260                pathlen = ce_namelen(ce);
 261                if (pathlen <= baselen || memcmp(base, path, baselen))
 262                        break; /* at the end of this level */
 263
 264                slash = strchr(path + baselen, '/');
 265                if (slash) {
 266                        entlen = slash - (path + baselen);
 267                        sub = find_subtree(it, path + baselen, entlen, 0);
 268                        if (!sub)
 269                                die("cache-tree.c: '%.*s' in '%s' not found",
 270                                    entlen, path + baselen, path);
 271                        i += sub->cache_tree->entry_count - 1;
 272                        sha1 = sub->cache_tree->sha1;
 273                        mode = S_IFDIR;
 274                }
 275                else {
 276                        sha1 = ce->sha1;
 277                        mode = ntohl(ce->ce_mode);
 278                        entlen = pathlen - baselen;
 279                }
 280                if (!missing_ok && !has_sha1_file(sha1))
 281                        return error("invalid object %s", sha1_to_hex(sha1));
 282
 283                if (!ce->ce_mode)
 284                        continue; /* entry being removed */
 285
 286                if (size < offset + entlen + 100) {
 287                        size = alloc_nr(offset + entlen + 100);
 288                        buffer = xrealloc(buffer, size);
 289                }
 290                offset += sprintf(buffer + offset,
 291                                  "%o %.*s", mode, entlen, path + baselen);
 292                buffer[offset++] = 0;
 293                memcpy(buffer + offset, sha1, 20);
 294                offset += 20;
 295
 296#if DEBUG
 297                fprintf(stderr, "cache-tree %o %.*s\n",
 298                        mode, entlen, path + baselen);
 299#endif
 300        }
 301
 302        write_sha1_file(buffer, offset, tree_type, it->sha1);
 303        free(buffer);
 304        it->entry_count = i;
 305#if DEBUG
 306        fprintf(stderr, "cache-tree (%d ent, %d subtree) %s\n",
 307                it->entry_count, it->subtree_nr,
 308                sha1_to_hex(it->sha1));
 309#endif
 310        return i;
 311}
 312
 313int cache_tree_update(struct cache_tree *it,
 314                      struct cache_entry **cache,
 315                      int entries,
 316                      int missing_ok)
 317{
 318        int i;
 319        i = verify_cache(cache, entries);
 320        if (i)
 321                return i;
 322        i = update_one(it, cache, entries, "", 0, missing_ok);
 323        if (i < 0)
 324                return i;
 325        return 0;
 326}
 327
 328static void *write_one(struct cache_tree *it,
 329                       char *path,
 330                       int pathlen,
 331                       char *buffer,
 332                       unsigned long *size,
 333                       unsigned long *offset)
 334{
 335        int i;
 336
 337        /* One "cache-tree" entry consists of the following:
 338         * path (NUL terminated)
 339         * entry_count, subtree_nr ("%d %d\n")
 340         * tree-sha1 (missing if invalid)
 341         * subtree_nr "cache-tree" entries for subtrees.
 342         */
 343        if (*size < *offset + pathlen + 100) {
 344                *size = alloc_nr(*offset + pathlen + 100);
 345                buffer = xrealloc(buffer, *size);
 346        }
 347        *offset += sprintf(buffer + *offset, "%.*s%c%d %d\n",
 348                           pathlen, path, 0,
 349                           it->entry_count, it->subtree_nr);
 350
 351#if DEBUG
 352        if (0 <= it->entry_count)
 353                fprintf(stderr, "cache-tree <%.*s> (%d ent, %d subtree) %s\n",
 354                        pathlen, path, it->entry_count, it->subtree_nr,
 355                        sha1_to_hex(it->sha1));
 356        else
 357                fprintf(stderr, "cache-tree <%.*s> (%d subtree) invalid\n",
 358                        pathlen, path, it->subtree_nr);
 359#endif
 360
 361        if (0 <= it->entry_count) {
 362                memcpy(buffer + *offset, it->sha1, 20);
 363                *offset += 20;
 364        }
 365        for (i = 0; i < it->subtree_nr; i++) {
 366                struct cache_tree_sub *down = it->down[i];
 367                buffer = write_one(down->cache_tree, down->name, down->namelen,
 368                                   buffer, size, offset);
 369        }
 370        return buffer;
 371}
 372
 373void *cache_tree_write(struct cache_tree *root, unsigned long *size_p)
 374{
 375        char path[PATH_MAX];
 376        unsigned long size = 8192;
 377        char *buffer = xmalloc(size);
 378
 379        *size_p = 0;
 380        path[0] = 0;
 381        return write_one(root, path, 0, buffer, &size, size_p);
 382}
 383
 384static struct cache_tree *read_one(const char **buffer, unsigned long *size_p)
 385{
 386        const char *buf = *buffer;
 387        unsigned long size = *size_p;
 388        struct cache_tree *it;
 389        int i, subtree_nr;
 390
 391        it = NULL;
 392        /* skip name, but make sure name exists */
 393        while (size && *buf) {
 394                size--;
 395                buf++;
 396        }
 397        if (!size)
 398                goto free_return;
 399        buf++; size--;
 400        it = cache_tree();
 401        if (sscanf(buf, "%d %d\n", &it->entry_count, &subtree_nr) != 2)
 402                goto free_return;
 403        while (size && *buf && *buf != '\n') {
 404                size--;
 405                buf++;
 406        }
 407        if (!size)
 408                goto free_return;
 409        buf++; size--;
 410        if (0 <= it->entry_count) {
 411                if (size < 20)
 412                        goto free_return;
 413                memcpy(it->sha1, buf, 20);
 414                buf += 20;
 415                size -= 20;
 416        }
 417
 418#if DEBUG
 419        if (0 <= it->entry_count)
 420                fprintf(stderr, "cache-tree <%s> (%d ent, %d subtree) %s\n",
 421                        *buffer, it->entry_count, subtree_nr,
 422                        sha1_to_hex(it->sha1));
 423        else
 424                fprintf(stderr, "cache-tree <%s> (%d subtrees) invalid\n",
 425                        *buffer, subtree_nr);
 426#endif
 427
 428        /*
 429         * Just a heuristic -- we do not add directories that often but
 430         * we do not want to have to extend it immediately when we do,
 431         * hence +2.
 432         */
 433        it->subtree_alloc = subtree_nr + 2;
 434        it->down = xcalloc(it->subtree_alloc, sizeof(struct cache_tree_sub *));
 435        for (i = 0; i < subtree_nr; i++) {
 436                /* read each subtree */
 437                struct cache_tree *sub;
 438                const char *name = buf;
 439                int namelen;
 440                sub = read_one(&buf, &size);
 441                if (!sub)
 442                        goto free_return;
 443                namelen = strlen(name);
 444                it->down[i] = find_subtree(it, name, namelen, 1);
 445                it->down[i]->cache_tree = sub;
 446        }
 447        if (subtree_nr != it->subtree_nr)
 448                die("cache-tree: internal error");
 449        *buffer = buf;
 450        *size_p = size;
 451        return it;
 452
 453 free_return:
 454        cache_tree_free(&it);
 455        return NULL;
 456}
 457
 458struct cache_tree *cache_tree_read(const char *buffer, unsigned long size)
 459{
 460        if (buffer[0])
 461                return NULL; /* not the whole tree */
 462        return read_one(&buffer, &size);
 463}