read-tree.con commit [PATCH] Improve git-rev-list memory usage further (b0d8923)
   1/*
   2 * GIT - The information manager from hell
   3 *
   4 * Copyright (C) Linus Torvalds, 2005
   5 */
   6#define DBRT_DEBUG 1
   7
   8#include "cache.h"
   9
  10#include "object.h"
  11#include "tree.h"
  12
  13static int merge = 0;
  14static int update = 0;
  15static int index_only = 0;
  16
  17static int head_idx = -1;
  18static int merge_size = 0;
  19
  20static struct object_list *trees = NULL;
  21
  22static struct cache_entry df_conflict_entry = { 
  23};
  24
  25static struct tree_entry_list df_conflict_list = {
  26        .name = NULL,
  27        .next = &df_conflict_list
  28};
  29
  30typedef int (*merge_fn_t)(struct cache_entry **src);
  31
  32static int entcmp(char *name1, int dir1, char *name2, int dir2)
  33{
  34        int len1 = strlen(name1);
  35        int len2 = strlen(name2);
  36        int len = len1 < len2 ? len1 : len2;
  37        int ret = memcmp(name1, name2, len);
  38        unsigned char c1, c2;
  39        if (ret)
  40                return ret;
  41        c1 = name1[len];
  42        c2 = name2[len];
  43        if (!c1 && dir1)
  44                c1 = '/';
  45        if (!c2 && dir2)
  46                c2 = '/';
  47        ret = (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0;
  48        if (c1 && c2 && !ret)
  49                ret = len1 - len2;
  50        return ret;
  51}
  52
  53static int unpack_trees_rec(struct tree_entry_list **posns, int len,
  54                            const char *base, merge_fn_t fn, int *indpos)
  55{
  56        int baselen = strlen(base);
  57        int src_size = len + 1;
  58        do {
  59                int i;
  60                char *first;
  61                int firstdir = 0;
  62                int pathlen;
  63                unsigned ce_size;
  64                struct tree_entry_list **subposns;
  65                struct cache_entry **src;
  66                int any_files = 0;
  67                int any_dirs = 0;
  68                char *cache_name;
  69                int ce_stage;
  70
  71                /* Find the first name in the input. */
  72
  73                first = NULL;
  74                cache_name = NULL;
  75
  76                /* Check the cache */
  77                if (merge && *indpos < active_nr) {
  78                        /* This is a bit tricky: */
  79                        /* If the index has a subdirectory (with
  80                         * contents) as the first name, it'll get a
  81                         * filename like "foo/bar". But that's after
  82                         * "foo", so the entry in trees will get
  83                         * handled first, at which point we'll go into
  84                         * "foo", and deal with "bar" from the index,
  85                         * because the base will be "foo/". The only
  86                         * way we can actually have "foo/bar" first of
  87                         * all the things is if the trees don't
  88                         * contain "foo" at all, in which case we'll
  89                         * handle "foo/bar" without going into the
  90                         * directory, but that's fine (and will return
  91                         * an error anyway, with the added unknown
  92                         * file case.
  93                         */
  94
  95                        cache_name = active_cache[*indpos]->name;
  96                        if (strlen(cache_name) > baselen &&
  97                            !memcmp(cache_name, base, baselen)) {
  98                                cache_name += baselen;
  99                                first = cache_name;
 100                        } else {
 101                                cache_name = NULL;
 102                        }
 103                }
 104
 105#if DBRT_DEBUG > 1
 106                if (first)
 107                        printf("index %s\n", first);
 108#endif
 109                for (i = 0; i < len; i++) {
 110                        if (!posns[i] || posns[i] == &df_conflict_list)
 111                                continue;
 112#if DBRT_DEBUG > 1
 113                        printf("%d %s\n", i + 1, posns[i]->name);
 114#endif
 115                        if (!first || entcmp(first, firstdir,
 116                                             posns[i]->name, 
 117                                             posns[i]->directory) > 0) {
 118                                first = posns[i]->name;
 119                                firstdir = posns[i]->directory;
 120                        }
 121                }
 122                /* No name means we're done */
 123                if (!first)
 124                        return 0;
 125
 126                pathlen = strlen(first);
 127                ce_size = cache_entry_size(baselen + pathlen);
 128
 129                src = xmalloc(sizeof(struct cache_entry *) * src_size);
 130                memset(src, 0, sizeof(struct cache_entry *) * src_size);
 131
 132                subposns = xmalloc(sizeof(struct tree_list_entry *) * len);
 133                memset(subposns, 0, sizeof(struct tree_list_entry *) * len);
 134
 135                if (cache_name && !strcmp(cache_name, first)) {
 136                        any_files = 1;
 137                        src[0] = active_cache[*indpos];
 138                        remove_cache_entry_at(*indpos);
 139                }
 140
 141                for (i = 0; i < len; i++) {
 142                        struct cache_entry *ce;
 143
 144                        if (!posns[i] ||
 145                            (posns[i] != &df_conflict_list &&
 146                             strcmp(first, posns[i]->name))) {
 147                                continue;
 148                        }
 149
 150                        if (posns[i] == &df_conflict_list) {
 151                                src[i + merge] = &df_conflict_entry;
 152                                continue;
 153                        }
 154
 155                        if (posns[i]->directory) {
 156                                any_dirs = 1;
 157                                parse_tree(posns[i]->item.tree);
 158                                subposns[i] = posns[i]->item.tree->entries;
 159                                posns[i] = posns[i]->next;
 160                                src[i + merge] = &df_conflict_entry;
 161                                continue;
 162                        }
 163
 164                        if (!merge)
 165                                ce_stage = 0;
 166                        else if (i + 1 < head_idx)
 167                                ce_stage = 1;
 168                        else if (i + 1 > head_idx)
 169                                ce_stage = 3;
 170                        else
 171                                ce_stage = 2;
 172
 173                        ce = xmalloc(ce_size);
 174                        memset(ce, 0, ce_size);
 175                        ce->ce_mode = create_ce_mode(posns[i]->mode);
 176                        ce->ce_flags = create_ce_flags(baselen + pathlen,
 177                                                       ce_stage);
 178                        memcpy(ce->name, base, baselen);
 179                        memcpy(ce->name + baselen, first, pathlen + 1);
 180
 181                        any_files = 1;
 182
 183                        memcpy(ce->sha1, posns[i]->item.any->sha1, 20);
 184                        src[i + merge] = ce;
 185                        subposns[i] = &df_conflict_list;
 186                        posns[i] = posns[i]->next;
 187                }
 188                if (any_files) {
 189                        if (merge) {
 190                                int ret;
 191
 192#if DBRT_DEBUG > 1
 193                                printf("%s:\n", first);
 194                                for (i = 0; i < src_size; i++) {
 195                                        printf(" %d ", i);
 196                                        if (src[i])
 197                                                printf("%s\n", sha1_to_hex(src[i]->sha1));
 198                                        else
 199                                                printf("\n");
 200                                }
 201#endif
 202                                ret = fn(src);
 203                                
 204#if DBRT_DEBUG > 1
 205                                printf("Added %d entries\n", ret);
 206#endif
 207                                *indpos += ret;
 208                        } else {
 209                                for (i = 0; i < src_size; i++) {
 210                                        if (src[i]) {
 211                                                add_cache_entry(src[i], ADD_CACHE_OK_TO_ADD|ADD_CACHE_SKIP_DFCHECK);
 212                                        }
 213                                }
 214                        }
 215                }
 216                if (any_dirs) {
 217                        char *newbase = xmalloc(baselen + 2 + pathlen);
 218                        memcpy(newbase, base, baselen);
 219                        memcpy(newbase + baselen, first, pathlen);
 220                        newbase[baselen + pathlen] = '/';
 221                        newbase[baselen + pathlen + 1] = '\0';
 222                        if (unpack_trees_rec(subposns, len, newbase, fn,
 223                                             indpos))
 224                                return -1;
 225                        free(newbase);
 226                }
 227                free(subposns);
 228                free(src);
 229        } while (1);
 230}
 231
 232static void reject_merge(struct cache_entry *ce)
 233{
 234        die("Entry '%s' would be overwritten by merge. Cannot merge.", 
 235            ce->name);
 236}
 237
 238static void check_updates(struct cache_entry **src, int nr)
 239{
 240        static struct checkout state = {
 241                .base_dir = "",
 242                .force = 1,
 243                .quiet = 1,
 244                .refresh_cache = 1,
 245        };
 246        unsigned short mask = htons(CE_UPDATE);
 247        while (nr--) {
 248                struct cache_entry *ce = *src++;
 249                if (!ce->ce_mode) {
 250                        if (update)
 251                                unlink(ce->name);
 252                        continue;
 253                }
 254                if (ce->ce_flags & mask) {
 255                        ce->ce_flags &= ~mask;
 256                        if (update)
 257                                checkout_entry(ce, &state);
 258                }
 259        }
 260}
 261
 262static int unpack_trees(merge_fn_t fn)
 263{
 264        int indpos = 0;
 265        unsigned len = object_list_length(trees);
 266        struct tree_entry_list **posns = 
 267                xmalloc(len * sizeof(struct tree_entry_list *));
 268        int i;
 269        struct object_list *posn = trees;
 270        merge_size = len;
 271        for (i = 0; i < len; i++) {
 272                posns[i] = ((struct tree *) posn->item)->entries;
 273                posn = posn->next;
 274        }
 275        if (unpack_trees_rec(posns, len, "", fn, &indpos))
 276                return -1;
 277
 278        check_updates(active_cache, active_nr);
 279        return 0;
 280}
 281
 282static int list_tree(unsigned char *sha1)
 283{
 284        struct tree *tree = parse_tree_indirect(sha1);
 285        if (!tree)
 286                return -1;
 287        object_list_append(&tree->object, &trees);
 288        return 0;
 289}
 290
 291static int same(struct cache_entry *a, struct cache_entry *b)
 292{
 293        if (!!a != !!b)
 294                return 0;
 295        if (!a && !b)
 296                return 1;
 297        return a->ce_mode == b->ce_mode && 
 298                !memcmp(a->sha1, b->sha1, 20);
 299}
 300
 301
 302/*
 303 * When a CE gets turned into an unmerged entry, we
 304 * want it to be up-to-date
 305 */
 306static void verify_uptodate(struct cache_entry *ce)
 307{
 308        struct stat st;
 309
 310        if (index_only)
 311                return;
 312
 313        if (!lstat(ce->name, &st)) {
 314                unsigned changed = ce_match_stat(ce, &st);
 315                if (!changed)
 316                        return;
 317                errno = 0;
 318        }
 319        if (errno == ENOENT)
 320                return;
 321        die("Entry '%s' not uptodate. Cannot merge.", ce->name);
 322}
 323
 324static int merged_entry(struct cache_entry *merge, struct cache_entry *old)
 325{
 326        merge->ce_flags |= htons(CE_UPDATE);
 327        if (old) {
 328                /*
 329                 * See if we can re-use the old CE directly?
 330                 * That way we get the uptodate stat info.
 331                 *
 332                 * This also removes the UPDATE flag on
 333                 * a match.
 334                 */
 335                if (same(old, merge)) {
 336                        *merge = *old;
 337                } else {
 338                        verify_uptodate(old);
 339                }
 340        }
 341        merge->ce_flags &= ~htons(CE_STAGEMASK);
 342        add_cache_entry(merge, ADD_CACHE_OK_TO_ADD);
 343        return 1;
 344}
 345
 346static int deleted_entry(struct cache_entry *ce, struct cache_entry *old)
 347{
 348        if (old)
 349                verify_uptodate(old);
 350        ce->ce_mode = 0;
 351        add_cache_entry(ce, ADD_CACHE_OK_TO_ADD);
 352        return 1;
 353}
 354
 355static int keep_entry(struct cache_entry *ce)
 356{
 357        add_cache_entry(ce, ADD_CACHE_OK_TO_ADD);
 358        return 1;
 359}
 360
 361#if DBRT_DEBUG
 362static void show_stage_entry(FILE *o,
 363                             const char *label, const struct cache_entry *ce)
 364{
 365        if (!ce)
 366                fprintf(o, "%s (missing)\n", label);
 367        else
 368                fprintf(o, "%s%06o %s %d\t%s\n",
 369                        label,
 370                        ntohl(ce->ce_mode),
 371                        sha1_to_hex(ce->sha1),
 372                        ce_stage(ce),
 373                        ce->name);
 374}
 375#endif
 376
 377static int threeway_merge(struct cache_entry **stages)
 378{
 379        struct cache_entry *index;
 380        struct cache_entry *head; 
 381        struct cache_entry *remote = stages[head_idx + 1];
 382        int count;
 383        int head_match = 0;
 384        int remote_match = 0;
 385
 386        int df_conflict_head = 0;
 387        int df_conflict_remote = 0;
 388
 389        int any_anc_missing = 0;
 390        int i;
 391
 392        for (i = 1; i < head_idx; i++) {
 393                if (!stages[i])
 394                        any_anc_missing = 1;
 395        }
 396
 397        index = stages[0];
 398        head = stages[head_idx];
 399
 400        if (head == &df_conflict_entry) {
 401                df_conflict_head = 1;
 402                head = NULL;
 403        }
 404
 405        if (remote == &df_conflict_entry) {
 406                df_conflict_remote = 1;
 407                remote = NULL;
 408        }
 409
 410        /* First, if there's a #16 situation, note that to prevent #13
 411         * and #14. 
 412         */
 413        if (!same(remote, head)) {
 414                for (i = 1; i < head_idx; i++) {
 415                        if (same(stages[i], head)) {
 416                                head_match = i;
 417                        }
 418                        if (same(stages[i], remote)) {
 419                                remote_match = i;
 420                        }
 421                }
 422        }
 423
 424        /* We start with cases where the index is allowed to match
 425         * something other than the head: #14(ALT) and #2ALT, where it
 426         * is permitted to match the result instead.
 427         */
 428        /* #14, #14ALT, #2ALT */
 429        if (remote && !df_conflict_head && head_match && !remote_match) {
 430                if (index && !same(index, remote) && !same(index, head))
 431                        reject_merge(index);
 432                return merged_entry(remote, index);
 433        }
 434        /*
 435         * If we have an entry in the index cache, then we want to
 436         * make sure that it matches head.
 437         */
 438        if (index && !same(index, head)) {
 439                reject_merge(index);
 440        }
 441
 442        if (head) {
 443                /* #5ALT, #15 */
 444                if (same(head, remote))
 445                        return merged_entry(head, index);
 446                /* #13, #3ALT */
 447                if (!df_conflict_remote && remote_match && !head_match)
 448                        return merged_entry(head, index);
 449        }
 450
 451        /* #1 */
 452        if (!head && !remote && any_anc_missing)
 453                return 0;
 454
 455        /* Below are "no merge" cases, which require that the index be
 456         * up-to-date to avoid the files getting overwritten with
 457         * conflict resolution files. 
 458         */
 459        if (index) {
 460                verify_uptodate(index);
 461        }
 462
 463        /* #2, #3, #4, #6, #7, #9, #11. */
 464        count = 0;
 465        if (!head_match || !remote_match) {
 466                for (i = 1; i < head_idx; i++) {
 467                        if (stages[i]) {
 468                                keep_entry(stages[i]);
 469                                count++;
 470                                break;
 471                        }
 472                }
 473        }
 474#if DBRT_DEBUG
 475        else {
 476                fprintf(stderr, "read-tree: warning #16 detected\n");
 477                show_stage_entry(stderr, "head   ", stages[head_match]);
 478                show_stage_entry(stderr, "remote ", stages[remote_match]);
 479        }
 480#endif
 481        if (head) { count += keep_entry(head); }
 482        if (remote) { count += keep_entry(remote); }
 483        return count;
 484}
 485
 486/*
 487 * Two-way merge.
 488 *
 489 * The rule is to "carry forward" what is in the index without losing
 490 * information across a "fast forward", favoring a successful merge
 491 * over a merge failure when it makes sense.  For details of the
 492 * "carry forward" rule, please see <Documentation/git-read-tree.txt>.
 493 *
 494 */
 495static int twoway_merge(struct cache_entry **src)
 496{
 497        struct cache_entry *current = src[0];
 498        struct cache_entry *oldtree = src[1], *newtree = src[2];
 499
 500        if (merge_size != 2)
 501                return error("Cannot do a twoway merge of %d trees\n",
 502                             merge_size);
 503
 504        if (current) {
 505                if ((!oldtree && !newtree) || /* 4 and 5 */
 506                    (!oldtree && newtree &&
 507                     same(current, newtree)) || /* 6 and 7 */
 508                    (oldtree && newtree &&
 509                     same(oldtree, newtree)) || /* 14 and 15 */
 510                    (oldtree && newtree &&
 511                     !same(oldtree, newtree) && /* 18 and 19*/
 512                     same(current, newtree))) {
 513                        return keep_entry(current);
 514                }
 515                else if (oldtree && !newtree && same(current, oldtree)) {
 516                        /* 10 or 11 */
 517                        return deleted_entry(oldtree, current);
 518                }
 519                else if (oldtree && newtree &&
 520                         same(current, oldtree) && !same(current, newtree)) {
 521                        /* 20 or 21 */
 522                        return merged_entry(newtree, current);
 523                }
 524                else {
 525                        /* all other failures */
 526                        if (oldtree)
 527                                reject_merge(oldtree);
 528                        if (current)
 529                                reject_merge(current);
 530                        if (newtree)
 531                                reject_merge(newtree);
 532                        return -1;
 533                }
 534        }
 535        else if (newtree)
 536                return merged_entry(newtree, current);
 537        else
 538                return deleted_entry(oldtree, current);
 539}
 540
 541/*
 542 * One-way merge.
 543 *
 544 * The rule is:
 545 * - take the stat information from stage0, take the data from stage1
 546 */
 547static int oneway_merge(struct cache_entry **src)
 548{
 549        struct cache_entry *old = src[0];
 550        struct cache_entry *a = src[1];
 551
 552        if (merge_size != 1)
 553                return error("Cannot do a oneway merge of %d trees\n",
 554                             merge_size);
 555
 556        if (!a)
 557                return 0;
 558        if (old && same(old, a)) {
 559                return keep_entry(old);
 560        }
 561        return merged_entry(a, NULL);
 562}
 563
 564static int read_cache_unmerged(void)
 565{
 566        int i, deleted;
 567        struct cache_entry **dst;
 568
 569        read_cache();
 570        dst = active_cache;
 571        deleted = 0;
 572        for (i = 0; i < active_nr; i++) {
 573                struct cache_entry *ce = active_cache[i];
 574                if (ce_stage(ce)) {
 575                        deleted++;
 576                        continue;
 577                }
 578                if (deleted)
 579                        *dst = ce;
 580                dst++;
 581        }
 582        active_nr -= deleted;
 583        return deleted;
 584}
 585
 586static const char read_tree_usage[] = "git-read-tree (<sha> | -m [-u | -i] <sha1> [<sha2> [<sha3>]])";
 587
 588static struct cache_file cache_file;
 589
 590int main(int argc, char **argv)
 591{
 592        int i, newfd, reset, stage = 0;
 593        unsigned char sha1[20];
 594        merge_fn_t fn = NULL;
 595
 596        newfd = hold_index_file_for_update(&cache_file, get_index_file());
 597        if (newfd < 0)
 598                die("unable to create new cachefile");
 599
 600        merge = 0;
 601        reset = 0;
 602        for (i = 1; i < argc; i++) {
 603                const char *arg = argv[i];
 604
 605                /* "-u" means "update", meaning that a merge will update
 606                 * the working tree.
 607                 */
 608                if (!strcmp(arg, "-u")) {
 609                        update = 1;
 610                        continue;
 611                }
 612
 613                /* "-i" means "index only", meaning that a merge will
 614                 * not even look at the working tree.
 615                 */
 616                if (!strcmp(arg, "-i")) {
 617                        index_only = 1;
 618                        continue;
 619                }
 620
 621                /* This differs from "-m" in that we'll silently ignore unmerged entries */
 622                if (!strcmp(arg, "--reset")) {
 623                        if (stage || merge)
 624                                usage(read_tree_usage);
 625                        reset = 1;
 626                        merge = 1;
 627                        stage = 1;
 628                        read_cache_unmerged();
 629                        continue;
 630                }
 631
 632                if (!strcmp(arg, "--head")) {
 633                        head_idx = stage - 1;
 634                        fn = threeway_merge;
 635                }
 636
 637                /* "-m" stands for "merge", meaning we start in stage 1 */
 638                if (!strcmp(arg, "-m")) {
 639                        if (stage || merge)
 640                                usage(read_tree_usage);
 641                        if (read_cache_unmerged())
 642                                die("you need to resolve your current index first");
 643                        stage = 1;
 644                        merge = 1;
 645                        continue;
 646                }
 647
 648                /* using -u and -i at the same time makes no sense */
 649                if (1 < index_only + update)
 650                        usage(read_tree_usage);
 651
 652                if (get_sha1(arg, sha1) < 0)
 653                        usage(read_tree_usage);
 654                if (list_tree(sha1) < 0)
 655                        die("failed to unpack tree object %s", arg);
 656                stage++;
 657        }
 658        if (update && !merge)
 659                usage(read_tree_usage);
 660        if (merge && !fn) {
 661                if (stage < 2)
 662                        die("just how do you expect me to merge %d trees?", stage-1);
 663                switch (stage - 1) {
 664                case 1:
 665                        fn = oneway_merge;
 666                        break;
 667                case 2:
 668                        fn = twoway_merge;
 669                        break;
 670                case 3:
 671                        fn = threeway_merge;
 672                        break;
 673                default:
 674                        fn = threeway_merge;
 675                        break;
 676                }
 677        }
 678
 679        if (head_idx < 0) {
 680                if (stage - 1 >= 3)
 681                        head_idx = stage - 2;
 682                else
 683                        head_idx = 1;
 684        }
 685
 686        unpack_trees(fn);
 687        if (write_cache(newfd, active_cache, active_nr) ||
 688            commit_index_file(&cache_file))
 689                die("unable to write new index file");
 690        return 0;
 691}