builtin / fast-export.con commit fast-export: Fix output order of D/F changes (060df62)
   1/*
   2 * "git fast-export" builtin command
   3 *
   4 * Copyright (C) 2007 Johannes E. Schindelin
   5 */
   6#include "builtin.h"
   7#include "cache.h"
   8#include "commit.h"
   9#include "object.h"
  10#include "tag.h"
  11#include "diff.h"
  12#include "diffcore.h"
  13#include "log-tree.h"
  14#include "revision.h"
  15#include "decorate.h"
  16#include "string-list.h"
  17#include "utf8.h"
  18#include "parse-options.h"
  19
  20static const char *fast_export_usage[] = {
  21        "git fast-export [rev-list-opts]",
  22        NULL
  23};
  24
  25static int progress;
  26static enum { ABORT, VERBATIM, WARN, STRIP } signed_tag_mode = ABORT;
  27static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ABORT;
  28static int fake_missing_tagger;
  29static int no_data;
  30
  31static int parse_opt_signed_tag_mode(const struct option *opt,
  32                                     const char *arg, int unset)
  33{
  34        if (unset || !strcmp(arg, "abort"))
  35                signed_tag_mode = ABORT;
  36        else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
  37                signed_tag_mode = VERBATIM;
  38        else if (!strcmp(arg, "warn"))
  39                signed_tag_mode = WARN;
  40        else if (!strcmp(arg, "strip"))
  41                signed_tag_mode = STRIP;
  42        else
  43                return error("Unknown signed-tag mode: %s", arg);
  44        return 0;
  45}
  46
  47static int parse_opt_tag_of_filtered_mode(const struct option *opt,
  48                                          const char *arg, int unset)
  49{
  50        if (unset || !strcmp(arg, "abort"))
  51                tag_of_filtered_mode = ABORT;
  52        else if (!strcmp(arg, "drop"))
  53                tag_of_filtered_mode = DROP;
  54        else if (!strcmp(arg, "rewrite"))
  55                tag_of_filtered_mode = REWRITE;
  56        else
  57                return error("Unknown tag-of-filtered mode: %s", arg);
  58        return 0;
  59}
  60
  61static struct decoration idnums;
  62static uint32_t last_idnum;
  63
  64static int has_unshown_parent(struct commit *commit)
  65{
  66        struct commit_list *parent;
  67
  68        for (parent = commit->parents; parent; parent = parent->next)
  69                if (!(parent->item->object.flags & SHOWN) &&
  70                    !(parent->item->object.flags & UNINTERESTING))
  71                        return 1;
  72        return 0;
  73}
  74
  75/* Since intptr_t is C99, we do not use it here */
  76static inline uint32_t *mark_to_ptr(uint32_t mark)
  77{
  78        return ((uint32_t *)NULL) + mark;
  79}
  80
  81static inline uint32_t ptr_to_mark(void * mark)
  82{
  83        return (uint32_t *)mark - (uint32_t *)NULL;
  84}
  85
  86static inline void mark_object(struct object *object, uint32_t mark)
  87{
  88        add_decoration(&idnums, object, mark_to_ptr(mark));
  89}
  90
  91static inline void mark_next_object(struct object *object)
  92{
  93        mark_object(object, ++last_idnum);
  94}
  95
  96static int get_object_mark(struct object *object)
  97{
  98        void *decoration = lookup_decoration(&idnums, object);
  99        if (!decoration)
 100                return 0;
 101        return ptr_to_mark(decoration);
 102}
 103
 104static void show_progress(void)
 105{
 106        static int counter = 0;
 107        if (!progress)
 108                return;
 109        if ((++counter % progress) == 0)
 110                printf("progress %d objects\n", counter);
 111}
 112
 113static void handle_object(const unsigned char *sha1)
 114{
 115        unsigned long size;
 116        enum object_type type;
 117        char *buf;
 118        struct object *object;
 119
 120        if (no_data)
 121                return;
 122
 123        if (is_null_sha1(sha1))
 124                return;
 125
 126        object = parse_object(sha1);
 127        if (!object)
 128                die ("Could not read blob %s", sha1_to_hex(sha1));
 129
 130        if (object->flags & SHOWN)
 131                return;
 132
 133        buf = read_sha1_file(sha1, &type, &size);
 134        if (!buf)
 135                die ("Could not read blob %s", sha1_to_hex(sha1));
 136
 137        mark_next_object(object);
 138
 139        printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size);
 140        if (size && fwrite(buf, size, 1, stdout) != 1)
 141                die_errno ("Could not write blob '%s'", sha1_to_hex(sha1));
 142        printf("\n");
 143
 144        show_progress();
 145
 146        object->flags |= SHOWN;
 147        free(buf);
 148}
 149
 150static int depth_first(const void *a_, const void *b_)
 151{
 152        const struct diff_filepair *a = *((const struct diff_filepair **)a_);
 153        const struct diff_filepair *b = *((const struct diff_filepair **)b_);
 154        const char *name_a, *name_b;
 155        int len_a, len_b, len;
 156        int cmp;
 157
 158        name_a = a->one ? a->one->path : a->two->path;
 159        name_b = b->one ? b->one->path : b->two->path;
 160
 161        len_a = strlen(name_a);
 162        len_b = strlen(name_b);
 163        len = (len_a < len_b) ? len_a : len_b;
 164
 165        /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
 166        cmp = memcmp(name_a, name_b, len);
 167        if (cmp)
 168                return cmp;
 169        return (len_b - len_a);
 170}
 171
 172static void show_filemodify(struct diff_queue_struct *q,
 173                            struct diff_options *options, void *data)
 174{
 175        int i;
 176
 177        /*
 178         * Handle files below a directory first, in case they are all deleted
 179         * and the directory changes to a file or symlink.
 180         */
 181        qsort(q->queue, q->nr, sizeof(q->queue[0]), depth_first);
 182
 183        for (i = 0; i < q->nr; i++) {
 184                struct diff_filespec *ospec = q->queue[i]->one;
 185                struct diff_filespec *spec = q->queue[i]->two;
 186
 187                switch (q->queue[i]->status) {
 188                case DIFF_STATUS_DELETED:
 189                        printf("D %s\n", spec->path);
 190                        break;
 191
 192                case DIFF_STATUS_COPIED:
 193                case DIFF_STATUS_RENAMED:
 194                        printf("%c \"%s\" \"%s\"\n", q->queue[i]->status,
 195                               ospec->path, spec->path);
 196
 197                        if (!hashcmp(ospec->sha1, spec->sha1) &&
 198                            ospec->mode == spec->mode)
 199                                break;
 200                        /* fallthrough */
 201
 202                case DIFF_STATUS_TYPE_CHANGED:
 203                case DIFF_STATUS_MODIFIED:
 204                case DIFF_STATUS_ADDED:
 205                        /*
 206                         * Links refer to objects in another repositories;
 207                         * output the SHA-1 verbatim.
 208                         */
 209                        if (no_data || S_ISGITLINK(spec->mode))
 210                                printf("M %06o %s %s\n", spec->mode,
 211                                       sha1_to_hex(spec->sha1), spec->path);
 212                        else {
 213                                struct object *object = lookup_object(spec->sha1);
 214                                printf("M %06o :%d %s\n", spec->mode,
 215                                       get_object_mark(object), spec->path);
 216                        }
 217                        break;
 218
 219                default:
 220                        die("Unexpected comparison status '%c' for %s, %s",
 221                                q->queue[i]->status,
 222                                ospec->path ? ospec->path : "none",
 223                                spec->path ? spec->path : "none");
 224                }
 225        }
 226}
 227
 228static const char *find_encoding(const char *begin, const char *end)
 229{
 230        const char *needle = "\nencoding ";
 231        char *bol, *eol;
 232
 233        bol = memmem(begin, end ? end - begin : strlen(begin),
 234                     needle, strlen(needle));
 235        if (!bol)
 236                return git_commit_encoding;
 237        bol += strlen(needle);
 238        eol = strchrnul(bol, '\n');
 239        *eol = '\0';
 240        return bol;
 241}
 242
 243static void handle_commit(struct commit *commit, struct rev_info *rev)
 244{
 245        int saved_output_format = rev->diffopt.output_format;
 246        const char *author, *author_end, *committer, *committer_end;
 247        const char *encoding, *message;
 248        char *reencoded = NULL;
 249        struct commit_list *p;
 250        int i;
 251
 252        rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
 253
 254        parse_commit(commit);
 255        author = strstr(commit->buffer, "\nauthor ");
 256        if (!author)
 257                die ("Could not find author in commit %s",
 258                     sha1_to_hex(commit->object.sha1));
 259        author++;
 260        author_end = strchrnul(author, '\n');
 261        committer = strstr(author_end, "\ncommitter ");
 262        if (!committer)
 263                die ("Could not find committer in commit %s",
 264                     sha1_to_hex(commit->object.sha1));
 265        committer++;
 266        committer_end = strchrnul(committer, '\n');
 267        message = strstr(committer_end, "\n\n");
 268        encoding = find_encoding(committer_end, message);
 269        if (message)
 270                message += 2;
 271
 272        if (commit->parents &&
 273            get_object_mark(&commit->parents->item->object) != 0) {
 274                parse_commit(commit->parents->item);
 275                diff_tree_sha1(commit->parents->item->tree->object.sha1,
 276                               commit->tree->object.sha1, "", &rev->diffopt);
 277        }
 278        else
 279                diff_root_tree_sha1(commit->tree->object.sha1,
 280                                    "", &rev->diffopt);
 281
 282        /* Export the referenced blobs, and remember the marks. */
 283        for (i = 0; i < diff_queued_diff.nr; i++)
 284                if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
 285                        handle_object(diff_queued_diff.queue[i]->two->sha1);
 286
 287        mark_next_object(&commit->object);
 288        if (!is_encoding_utf8(encoding))
 289                reencoded = reencode_string(message, "UTF-8", encoding);
 290        if (!commit->parents)
 291                printf("reset %s\n", (const char*)commit->util);
 292        printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
 293               (const char *)commit->util, last_idnum,
 294               (int)(author_end - author), author,
 295               (int)(committer_end - committer), committer,
 296               (unsigned)(reencoded
 297                          ? strlen(reencoded) : message
 298                          ? strlen(message) : 0),
 299               reencoded ? reencoded : message ? message : "");
 300        free(reencoded);
 301
 302        for (i = 0, p = commit->parents; p; p = p->next) {
 303                int mark = get_object_mark(&p->item->object);
 304                if (!mark)
 305                        continue;
 306                if (i == 0)
 307                        printf("from :%d\n", mark);
 308                else
 309                        printf("merge :%d\n", mark);
 310                i++;
 311        }
 312
 313        log_tree_diff_flush(rev);
 314        rev->diffopt.output_format = saved_output_format;
 315
 316        printf("\n");
 317
 318        show_progress();
 319}
 320
 321static void handle_tail(struct object_array *commits, struct rev_info *revs)
 322{
 323        struct commit *commit;
 324        while (commits->nr) {
 325                commit = (struct commit *)commits->objects[commits->nr - 1].item;
 326                if (has_unshown_parent(commit))
 327                        return;
 328                handle_commit(commit, revs);
 329                commits->nr--;
 330        }
 331}
 332
 333static void handle_tag(const char *name, struct tag *tag)
 334{
 335        unsigned long size;
 336        enum object_type type;
 337        char *buf;
 338        const char *tagger, *tagger_end, *message;
 339        size_t message_size = 0;
 340        struct object *tagged;
 341        int tagged_mark;
 342        struct commit *p;
 343
 344        /* Trees have no identifer in fast-export output, thus we have no way
 345         * to output tags of trees, tags of tags of trees, etc.  Simply omit
 346         * such tags.
 347         */
 348        tagged = tag->tagged;
 349        while (tagged->type == OBJ_TAG) {
 350                tagged = ((struct tag *)tagged)->tagged;
 351        }
 352        if (tagged->type == OBJ_TREE) {
 353                warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
 354                        sha1_to_hex(tag->object.sha1));
 355                return;
 356        }
 357
 358        buf = read_sha1_file(tag->object.sha1, &type, &size);
 359        if (!buf)
 360                die ("Could not read tag %s", sha1_to_hex(tag->object.sha1));
 361        message = memmem(buf, size, "\n\n", 2);
 362        if (message) {
 363                message += 2;
 364                message_size = strlen(message);
 365        }
 366        tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
 367        if (!tagger) {
 368                if (fake_missing_tagger)
 369                        tagger = "tagger Unspecified Tagger "
 370                                "<unspecified-tagger> 0 +0000";
 371                else
 372                        tagger = "";
 373                tagger_end = tagger + strlen(tagger);
 374        } else {
 375                tagger++;
 376                tagger_end = strchrnul(tagger, '\n');
 377        }
 378
 379        /* handle signed tags */
 380        if (message) {
 381                const char *signature = strstr(message,
 382                                               "\n-----BEGIN PGP SIGNATURE-----\n");
 383                if (signature)
 384                        switch(signed_tag_mode) {
 385                        case ABORT:
 386                                die ("Encountered signed tag %s; use "
 387                                     "--signed-tag=<mode> to handle it.",
 388                                     sha1_to_hex(tag->object.sha1));
 389                        case WARN:
 390                                warning ("Exporting signed tag %s",
 391                                         sha1_to_hex(tag->object.sha1));
 392                                /* fallthru */
 393                        case VERBATIM:
 394                                break;
 395                        case STRIP:
 396                                message_size = signature + 1 - message;
 397                                break;
 398                        }
 399        }
 400
 401        /* handle tag->tagged having been filtered out due to paths specified */
 402        tagged = tag->tagged;
 403        tagged_mark = get_object_mark(tagged);
 404        if (!tagged_mark) {
 405                switch(tag_of_filtered_mode) {
 406                case ABORT:
 407                        die ("Tag %s tags unexported object; use "
 408                             "--tag-of-filtered-object=<mode> to handle it.",
 409                             sha1_to_hex(tag->object.sha1));
 410                case DROP:
 411                        /* Ignore this tag altogether */
 412                        return;
 413                case REWRITE:
 414                        if (tagged->type != OBJ_COMMIT) {
 415                                die ("Tag %s tags unexported %s!",
 416                                     sha1_to_hex(tag->object.sha1),
 417                                     typename(tagged->type));
 418                        }
 419                        p = (struct commit *)tagged;
 420                        for (;;) {
 421                                if (p->parents && p->parents->next)
 422                                        break;
 423                                if (p->object.flags & UNINTERESTING)
 424                                        break;
 425                                if (!(p->object.flags & TREESAME))
 426                                        break;
 427                                if (!p->parents)
 428                                        die ("Can't find replacement commit for tag %s\n",
 429                                             sha1_to_hex(tag->object.sha1));
 430                                p = p->parents->item;
 431                        }
 432                        tagged_mark = get_object_mark(&p->object);
 433                }
 434        }
 435
 436        if (!prefixcmp(name, "refs/tags/"))
 437                name += 10;
 438        printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n",
 439               name, tagged_mark,
 440               (int)(tagger_end - tagger), tagger,
 441               tagger == tagger_end ? "" : "\n",
 442               (int)message_size, (int)message_size, message ? message : "");
 443}
 444
 445static void get_tags_and_duplicates(struct object_array *pending,
 446                                    struct string_list *extra_refs)
 447{
 448        struct tag *tag;
 449        int i;
 450
 451        for (i = 0; i < pending->nr; i++) {
 452                struct object_array_entry *e = pending->objects + i;
 453                unsigned char sha1[20];
 454                struct commit *commit = commit;
 455                char *full_name;
 456
 457                if (dwim_ref(e->name, strlen(e->name), sha1, &full_name) != 1)
 458                        continue;
 459
 460                switch (e->item->type) {
 461                case OBJ_COMMIT:
 462                        commit = (struct commit *)e->item;
 463                        break;
 464                case OBJ_TAG:
 465                        tag = (struct tag *)e->item;
 466
 467                        /* handle nested tags */
 468                        while (tag && tag->object.type == OBJ_TAG) {
 469                                parse_object(tag->object.sha1);
 470                                string_list_append(full_name, extra_refs)->util = tag;
 471                                tag = (struct tag *)tag->tagged;
 472                        }
 473                        if (!tag)
 474                                die ("Tag %s points nowhere?", e->name);
 475                        switch(tag->object.type) {
 476                        case OBJ_COMMIT:
 477                                commit = (struct commit *)tag;
 478                                break;
 479                        case OBJ_BLOB:
 480                                handle_object(tag->object.sha1);
 481                                continue;
 482                        default: /* OBJ_TAG (nested tags) is already handled */
 483                                warning("Tag points to object of unexpected type %s, skipping.",
 484                                        typename(tag->object.type));
 485                                continue;
 486                        }
 487                        break;
 488                default:
 489                        warning("%s: Unexpected object of type %s, skipping.",
 490                                e->name,
 491                                typename(e->item->type));
 492                        continue;
 493                }
 494                if (commit->util)
 495                        /* more than one name for the same object */
 496                        string_list_append(full_name, extra_refs)->util = commit;
 497                else
 498                        commit->util = full_name;
 499        }
 500}
 501
 502static void handle_tags_and_duplicates(struct string_list *extra_refs)
 503{
 504        struct commit *commit;
 505        int i;
 506
 507        for (i = extra_refs->nr - 1; i >= 0; i--) {
 508                const char *name = extra_refs->items[i].string;
 509                struct object *object = extra_refs->items[i].util;
 510                switch (object->type) {
 511                case OBJ_TAG:
 512                        handle_tag(name, (struct tag *)object);
 513                        break;
 514                case OBJ_COMMIT:
 515                        /* create refs pointing to already seen commits */
 516                        commit = (struct commit *)object;
 517                        printf("reset %s\nfrom :%d\n\n", name,
 518                               get_object_mark(&commit->object));
 519                        show_progress();
 520                        break;
 521                }
 522        }
 523}
 524
 525static void export_marks(char *file)
 526{
 527        unsigned int i;
 528        uint32_t mark;
 529        struct object_decoration *deco = idnums.hash;
 530        FILE *f;
 531        int e = 0;
 532
 533        f = fopen(file, "w");
 534        if (!f)
 535                die_errno("Unable to open marks file %s for writing.", file);
 536
 537        for (i = 0; i < idnums.size; i++) {
 538                if (deco->base && deco->base->type == 1) {
 539                        mark = ptr_to_mark(deco->decoration);
 540                        if (fprintf(f, ":%"PRIu32" %s\n", mark,
 541                                sha1_to_hex(deco->base->sha1)) < 0) {
 542                            e = 1;
 543                            break;
 544                        }
 545                }
 546                deco++;
 547        }
 548
 549        e |= ferror(f);
 550        e |= fclose(f);
 551        if (e)
 552                error("Unable to write marks file %s.", file);
 553}
 554
 555static void import_marks(char *input_file)
 556{
 557        char line[512];
 558        FILE *f = fopen(input_file, "r");
 559        if (!f)
 560                die_errno("cannot read '%s'", input_file);
 561
 562        while (fgets(line, sizeof(line), f)) {
 563                uint32_t mark;
 564                char *line_end, *mark_end;
 565                unsigned char sha1[20];
 566                struct object *object;
 567
 568                line_end = strchr(line, '\n');
 569                if (line[0] != ':' || !line_end)
 570                        die("corrupt mark line: %s", line);
 571                *line_end = '\0';
 572
 573                mark = strtoumax(line + 1, &mark_end, 10);
 574                if (!mark || mark_end == line + 1
 575                        || *mark_end != ' ' || get_sha1(mark_end + 1, sha1))
 576                        die("corrupt mark line: %s", line);
 577
 578                object = parse_object(sha1);
 579                if (!object)
 580                        die ("Could not read blob %s", sha1_to_hex(sha1));
 581
 582                if (object->flags & SHOWN)
 583                        error("Object %s already has a mark", sha1);
 584
 585                mark_object(object, mark);
 586                if (last_idnum < mark)
 587                        last_idnum = mark;
 588
 589                object->flags |= SHOWN;
 590        }
 591        fclose(f);
 592}
 593
 594int cmd_fast_export(int argc, const char **argv, const char *prefix)
 595{
 596        struct rev_info revs;
 597        struct object_array commits = { 0, 0, NULL };
 598        struct string_list extra_refs = { NULL, 0, 0, 0 };
 599        struct commit *commit;
 600        char *export_filename = NULL, *import_filename = NULL;
 601        struct option options[] = {
 602                OPT_INTEGER(0, "progress", &progress,
 603                            "show progress after <n> objects"),
 604                OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, "mode",
 605                             "select handling of signed tags",
 606                             parse_opt_signed_tag_mode),
 607                OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, "mode",
 608                             "select handling of tags that tag filtered objects",
 609                             parse_opt_tag_of_filtered_mode),
 610                OPT_STRING(0, "export-marks", &export_filename, "FILE",
 611                             "Dump marks to this file"),
 612                OPT_STRING(0, "import-marks", &import_filename, "FILE",
 613                             "Import marks from this file"),
 614                OPT_BOOLEAN(0, "fake-missing-tagger", &fake_missing_tagger,
 615                             "Fake a tagger when tags lack one"),
 616                { OPTION_NEGBIT, 0, "data", &no_data, NULL,
 617                        "Skip output of blob data",
 618                        PARSE_OPT_NOARG | PARSE_OPT_NEGHELP, NULL, 1 },
 619                OPT_END()
 620        };
 621
 622        if (argc == 1)
 623                usage_with_options (fast_export_usage, options);
 624
 625        /* we handle encodings */
 626        git_config(git_default_config, NULL);
 627
 628        init_revisions(&revs, prefix);
 629        revs.topo_order = 1;
 630        revs.show_source = 1;
 631        revs.rewrite_parents = 1;
 632        argc = setup_revisions(argc, argv, &revs, NULL);
 633        argc = parse_options(argc, argv, prefix, options, fast_export_usage, 0);
 634        if (argc > 1)
 635                usage_with_options (fast_export_usage, options);
 636
 637        if (import_filename)
 638                import_marks(import_filename);
 639
 640        get_tags_and_duplicates(&revs.pending, &extra_refs);
 641
 642        if (prepare_revision_walk(&revs))
 643                die("revision walk setup failed");
 644        revs.diffopt.format_callback = show_filemodify;
 645        DIFF_OPT_SET(&revs.diffopt, RECURSIVE);
 646        while ((commit = get_revision(&revs))) {
 647                if (has_unshown_parent(commit)) {
 648                        add_object_array(&commit->object, NULL, &commits);
 649                }
 650                else {
 651                        handle_commit(commit, &revs);
 652                        handle_tail(&commits, &revs);
 653                }
 654        }
 655
 656        handle_tags_and_duplicates(&extra_refs);
 657
 658        if (export_filename)
 659                export_marks(export_filename);
 660
 661        return 0;
 662}