builtin / fast-export.con commit Merge branch 'jc/maint-blame-minimal' into maint (089c0ca)
   1/*
   2 * "git fast-export" builtin command
   3 *
   4 * Copyright (C) 2007 Johannes E. Schindelin
   5 */
   6#include "builtin.h"
   7#include "cache.h"
   8#include "commit.h"
   9#include "object.h"
  10#include "tag.h"
  11#include "diff.h"
  12#include "diffcore.h"
  13#include "log-tree.h"
  14#include "revision.h"
  15#include "decorate.h"
  16#include "string-list.h"
  17#include "utf8.h"
  18#include "parse-options.h"
  19#include "quote.h"
  20
  21static const char *fast_export_usage[] = {
  22        "git fast-export [rev-list-opts]",
  23        NULL
  24};
  25
  26static int progress;
  27static enum { ABORT, VERBATIM, WARN, STRIP } signed_tag_mode = ABORT;
  28static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ERROR;
  29static int fake_missing_tagger;
  30static int use_done_feature;
  31static int no_data;
  32static int full_tree;
  33
  34static int parse_opt_signed_tag_mode(const struct option *opt,
  35                                     const char *arg, int unset)
  36{
  37        if (unset || !strcmp(arg, "abort"))
  38                signed_tag_mode = ABORT;
  39        else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
  40                signed_tag_mode = VERBATIM;
  41        else if (!strcmp(arg, "warn"))
  42                signed_tag_mode = WARN;
  43        else if (!strcmp(arg, "strip"))
  44                signed_tag_mode = STRIP;
  45        else
  46                return error("Unknown signed-tag mode: %s", arg);
  47        return 0;
  48}
  49
  50static int parse_opt_tag_of_filtered_mode(const struct option *opt,
  51                                          const char *arg, int unset)
  52{
  53        if (unset || !strcmp(arg, "abort"))
  54                tag_of_filtered_mode = ERROR;
  55        else if (!strcmp(arg, "drop"))
  56                tag_of_filtered_mode = DROP;
  57        else if (!strcmp(arg, "rewrite"))
  58                tag_of_filtered_mode = REWRITE;
  59        else
  60                return error("Unknown tag-of-filtered mode: %s", arg);
  61        return 0;
  62}
  63
  64static struct decoration idnums;
  65static uint32_t last_idnum;
  66
  67static int has_unshown_parent(struct commit *commit)
  68{
  69        struct commit_list *parent;
  70
  71        for (parent = commit->parents; parent; parent = parent->next)
  72                if (!(parent->item->object.flags & SHOWN) &&
  73                    !(parent->item->object.flags & UNINTERESTING))
  74                        return 1;
  75        return 0;
  76}
  77
  78/* Since intptr_t is C99, we do not use it here */
  79static inline uint32_t *mark_to_ptr(uint32_t mark)
  80{
  81        return ((uint32_t *)NULL) + mark;
  82}
  83
  84static inline uint32_t ptr_to_mark(void * mark)
  85{
  86        return (uint32_t *)mark - (uint32_t *)NULL;
  87}
  88
  89static inline void mark_object(struct object *object, uint32_t mark)
  90{
  91        add_decoration(&idnums, object, mark_to_ptr(mark));
  92}
  93
  94static inline void mark_next_object(struct object *object)
  95{
  96        mark_object(object, ++last_idnum);
  97}
  98
  99static int get_object_mark(struct object *object)
 100{
 101        void *decoration = lookup_decoration(&idnums, object);
 102        if (!decoration)
 103                return 0;
 104        return ptr_to_mark(decoration);
 105}
 106
 107static void show_progress(void)
 108{
 109        static int counter = 0;
 110        if (!progress)
 111                return;
 112        if ((++counter % progress) == 0)
 113                printf("progress %d objects\n", counter);
 114}
 115
 116static void handle_object(const unsigned char *sha1)
 117{
 118        unsigned long size;
 119        enum object_type type;
 120        char *buf;
 121        struct object *object;
 122
 123        if (no_data)
 124                return;
 125
 126        if (is_null_sha1(sha1))
 127                return;
 128
 129        object = parse_object(sha1);
 130        if (!object)
 131                die ("Could not read blob %s", sha1_to_hex(sha1));
 132
 133        if (object->flags & SHOWN)
 134                return;
 135
 136        buf = read_sha1_file(sha1, &type, &size);
 137        if (!buf)
 138                die ("Could not read blob %s", sha1_to_hex(sha1));
 139
 140        mark_next_object(object);
 141
 142        printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size);
 143        if (size && fwrite(buf, size, 1, stdout) != 1)
 144                die_errno ("Could not write blob '%s'", sha1_to_hex(sha1));
 145        printf("\n");
 146
 147        show_progress();
 148
 149        object->flags |= SHOWN;
 150        free(buf);
 151}
 152
 153static int depth_first(const void *a_, const void *b_)
 154{
 155        const struct diff_filepair *a = *((const struct diff_filepair **)a_);
 156        const struct diff_filepair *b = *((const struct diff_filepair **)b_);
 157        const char *name_a, *name_b;
 158        int len_a, len_b, len;
 159        int cmp;
 160
 161        name_a = a->one ? a->one->path : a->two->path;
 162        name_b = b->one ? b->one->path : b->two->path;
 163
 164        len_a = strlen(name_a);
 165        len_b = strlen(name_b);
 166        len = (len_a < len_b) ? len_a : len_b;
 167
 168        /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
 169        cmp = memcmp(name_a, name_b, len);
 170        if (cmp)
 171                return cmp;
 172        cmp = len_b - len_a;
 173        if (cmp)
 174                return cmp;
 175        /*
 176         * Move 'R'ename entries last so that all references of the file
 177         * appear in the output before it is renamed (e.g., when a file
 178         * was copied and renamed in the same commit).
 179         */
 180        return (a->status == 'R') - (b->status == 'R');
 181}
 182
 183static void print_path(const char *path)
 184{
 185        int need_quote = quote_c_style(path, NULL, NULL, 0);
 186        if (need_quote)
 187                quote_c_style(path, NULL, stdout, 0);
 188        else
 189                printf("%s", path);
 190}
 191
 192static void show_filemodify(struct diff_queue_struct *q,
 193                            struct diff_options *options, void *data)
 194{
 195        int i;
 196
 197        /*
 198         * Handle files below a directory first, in case they are all deleted
 199         * and the directory changes to a file or symlink.
 200         */
 201        qsort(q->queue, q->nr, sizeof(q->queue[0]), depth_first);
 202
 203        for (i = 0; i < q->nr; i++) {
 204                struct diff_filespec *ospec = q->queue[i]->one;
 205                struct diff_filespec *spec = q->queue[i]->two;
 206
 207                switch (q->queue[i]->status) {
 208                case DIFF_STATUS_DELETED:
 209                        printf("D ");
 210                        print_path(spec->path);
 211                        putchar('\n');
 212                        break;
 213
 214                case DIFF_STATUS_COPIED:
 215                case DIFF_STATUS_RENAMED:
 216                        printf("%c ", q->queue[i]->status);
 217                        print_path(ospec->path);
 218                        putchar(' ');
 219                        print_path(spec->path);
 220                        putchar('\n');
 221
 222                        if (!hashcmp(ospec->sha1, spec->sha1) &&
 223                            ospec->mode == spec->mode)
 224                                break;
 225                        /* fallthrough */
 226
 227                case DIFF_STATUS_TYPE_CHANGED:
 228                case DIFF_STATUS_MODIFIED:
 229                case DIFF_STATUS_ADDED:
 230                        /*
 231                         * Links refer to objects in another repositories;
 232                         * output the SHA-1 verbatim.
 233                         */
 234                        if (no_data || S_ISGITLINK(spec->mode))
 235                                printf("M %06o %s ", spec->mode,
 236                                       sha1_to_hex(spec->sha1));
 237                        else {
 238                                struct object *object = lookup_object(spec->sha1);
 239                                printf("M %06o :%d ", spec->mode,
 240                                       get_object_mark(object));
 241                        }
 242                        print_path(spec->path);
 243                        putchar('\n');
 244                        break;
 245
 246                default:
 247                        die("Unexpected comparison status '%c' for %s, %s",
 248                                q->queue[i]->status,
 249                                ospec->path ? ospec->path : "none",
 250                                spec->path ? spec->path : "none");
 251                }
 252        }
 253}
 254
 255static const char *find_encoding(const char *begin, const char *end)
 256{
 257        const char *needle = "\nencoding ";
 258        char *bol, *eol;
 259
 260        bol = memmem(begin, end ? end - begin : strlen(begin),
 261                     needle, strlen(needle));
 262        if (!bol)
 263                return git_commit_encoding;
 264        bol += strlen(needle);
 265        eol = strchrnul(bol, '\n');
 266        *eol = '\0';
 267        return bol;
 268}
 269
 270static void handle_commit(struct commit *commit, struct rev_info *rev)
 271{
 272        int saved_output_format = rev->diffopt.output_format;
 273        const char *author, *author_end, *committer, *committer_end;
 274        const char *encoding, *message;
 275        char *reencoded = NULL;
 276        struct commit_list *p;
 277        int i;
 278
 279        rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
 280
 281        parse_commit(commit);
 282        author = strstr(commit->buffer, "\nauthor ");
 283        if (!author)
 284                die ("Could not find author in commit %s",
 285                     sha1_to_hex(commit->object.sha1));
 286        author++;
 287        author_end = strchrnul(author, '\n');
 288        committer = strstr(author_end, "\ncommitter ");
 289        if (!committer)
 290                die ("Could not find committer in commit %s",
 291                     sha1_to_hex(commit->object.sha1));
 292        committer++;
 293        committer_end = strchrnul(committer, '\n');
 294        message = strstr(committer_end, "\n\n");
 295        encoding = find_encoding(committer_end, message);
 296        if (message)
 297                message += 2;
 298
 299        if (commit->parents &&
 300            get_object_mark(&commit->parents->item->object) != 0 &&
 301            !full_tree) {
 302                parse_commit(commit->parents->item);
 303                diff_tree_sha1(commit->parents->item->tree->object.sha1,
 304                               commit->tree->object.sha1, "", &rev->diffopt);
 305        }
 306        else
 307                diff_root_tree_sha1(commit->tree->object.sha1,
 308                                    "", &rev->diffopt);
 309
 310        /* Export the referenced blobs, and remember the marks. */
 311        for (i = 0; i < diff_queued_diff.nr; i++)
 312                if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
 313                        handle_object(diff_queued_diff.queue[i]->two->sha1);
 314
 315        mark_next_object(&commit->object);
 316        if (!is_encoding_utf8(encoding))
 317                reencoded = reencode_string(message, "UTF-8", encoding);
 318        if (!commit->parents)
 319                printf("reset %s\n", (const char*)commit->util);
 320        printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
 321               (const char *)commit->util, last_idnum,
 322               (int)(author_end - author), author,
 323               (int)(committer_end - committer), committer,
 324               (unsigned)(reencoded
 325                          ? strlen(reencoded) : message
 326                          ? strlen(message) : 0),
 327               reencoded ? reencoded : message ? message : "");
 328        free(reencoded);
 329
 330        for (i = 0, p = commit->parents; p; p = p->next) {
 331                int mark = get_object_mark(&p->item->object);
 332                if (!mark)
 333                        continue;
 334                if (i == 0)
 335                        printf("from :%d\n", mark);
 336                else
 337                        printf("merge :%d\n", mark);
 338                i++;
 339        }
 340
 341        if (full_tree)
 342                printf("deleteall\n");
 343        log_tree_diff_flush(rev);
 344        rev->diffopt.output_format = saved_output_format;
 345
 346        printf("\n");
 347
 348        show_progress();
 349}
 350
 351static void handle_tail(struct object_array *commits, struct rev_info *revs)
 352{
 353        struct commit *commit;
 354        while (commits->nr) {
 355                commit = (struct commit *)commits->objects[commits->nr - 1].item;
 356                if (has_unshown_parent(commit))
 357                        return;
 358                handle_commit(commit, revs);
 359                commits->nr--;
 360        }
 361}
 362
 363static void handle_tag(const char *name, struct tag *tag)
 364{
 365        unsigned long size;
 366        enum object_type type;
 367        char *buf;
 368        const char *tagger, *tagger_end, *message;
 369        size_t message_size = 0;
 370        struct object *tagged;
 371        int tagged_mark;
 372        struct commit *p;
 373
 374        /* Trees have no identifer in fast-export output, thus we have no way
 375         * to output tags of trees, tags of tags of trees, etc.  Simply omit
 376         * such tags.
 377         */
 378        tagged = tag->tagged;
 379        while (tagged->type == OBJ_TAG) {
 380                tagged = ((struct tag *)tagged)->tagged;
 381        }
 382        if (tagged->type == OBJ_TREE) {
 383                warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
 384                        sha1_to_hex(tag->object.sha1));
 385                return;
 386        }
 387
 388        buf = read_sha1_file(tag->object.sha1, &type, &size);
 389        if (!buf)
 390                die ("Could not read tag %s", sha1_to_hex(tag->object.sha1));
 391        message = memmem(buf, size, "\n\n", 2);
 392        if (message) {
 393                message += 2;
 394                message_size = strlen(message);
 395        }
 396        tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
 397        if (!tagger) {
 398                if (fake_missing_tagger)
 399                        tagger = "tagger Unspecified Tagger "
 400                                "<unspecified-tagger> 0 +0000";
 401                else
 402                        tagger = "";
 403                tagger_end = tagger + strlen(tagger);
 404        } else {
 405                tagger++;
 406                tagger_end = strchrnul(tagger, '\n');
 407        }
 408
 409        /* handle signed tags */
 410        if (message) {
 411                const char *signature = strstr(message,
 412                                               "\n-----BEGIN PGP SIGNATURE-----\n");
 413                if (signature)
 414                        switch(signed_tag_mode) {
 415                        case ABORT:
 416                                die ("Encountered signed tag %s; use "
 417                                     "--signed-tag=<mode> to handle it.",
 418                                     sha1_to_hex(tag->object.sha1));
 419                        case WARN:
 420                                warning ("Exporting signed tag %s",
 421                                         sha1_to_hex(tag->object.sha1));
 422                                /* fallthru */
 423                        case VERBATIM:
 424                                break;
 425                        case STRIP:
 426                                message_size = signature + 1 - message;
 427                                break;
 428                        }
 429        }
 430
 431        /* handle tag->tagged having been filtered out due to paths specified */
 432        tagged = tag->tagged;
 433        tagged_mark = get_object_mark(tagged);
 434        if (!tagged_mark) {
 435                switch(tag_of_filtered_mode) {
 436                case ABORT:
 437                        die ("Tag %s tags unexported object; use "
 438                             "--tag-of-filtered-object=<mode> to handle it.",
 439                             sha1_to_hex(tag->object.sha1));
 440                case DROP:
 441                        /* Ignore this tag altogether */
 442                        return;
 443                case REWRITE:
 444                        if (tagged->type != OBJ_COMMIT) {
 445                                die ("Tag %s tags unexported %s!",
 446                                     sha1_to_hex(tag->object.sha1),
 447                                     typename(tagged->type));
 448                        }
 449                        p = (struct commit *)tagged;
 450                        for (;;) {
 451                                if (p->parents && p->parents->next)
 452                                        break;
 453                                if (p->object.flags & UNINTERESTING)
 454                                        break;
 455                                if (!(p->object.flags & TREESAME))
 456                                        break;
 457                                if (!p->parents)
 458                                        die ("Can't find replacement commit for tag %s\n",
 459                                             sha1_to_hex(tag->object.sha1));
 460                                p = p->parents->item;
 461                        }
 462                        tagged_mark = get_object_mark(&p->object);
 463                }
 464        }
 465
 466        if (!prefixcmp(name, "refs/tags/"))
 467                name += 10;
 468        printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n",
 469               name, tagged_mark,
 470               (int)(tagger_end - tagger), tagger,
 471               tagger == tagger_end ? "" : "\n",
 472               (int)message_size, (int)message_size, message ? message : "");
 473}
 474
 475static void get_tags_and_duplicates(struct object_array *pending,
 476                                    struct string_list *extra_refs)
 477{
 478        struct tag *tag;
 479        int i;
 480
 481        for (i = 0; i < pending->nr; i++) {
 482                struct object_array_entry *e = pending->objects + i;
 483                unsigned char sha1[20];
 484                struct commit *commit = commit;
 485                char *full_name;
 486
 487                if (dwim_ref(e->name, strlen(e->name), sha1, &full_name) != 1)
 488                        continue;
 489
 490                switch (e->item->type) {
 491                case OBJ_COMMIT:
 492                        commit = (struct commit *)e->item;
 493                        break;
 494                case OBJ_TAG:
 495                        tag = (struct tag *)e->item;
 496
 497                        /* handle nested tags */
 498                        while (tag && tag->object.type == OBJ_TAG) {
 499                                parse_object(tag->object.sha1);
 500                                string_list_append(extra_refs, full_name)->util = tag;
 501                                tag = (struct tag *)tag->tagged;
 502                        }
 503                        if (!tag)
 504                                die ("Tag %s points nowhere?", e->name);
 505                        switch(tag->object.type) {
 506                        case OBJ_COMMIT:
 507                                commit = (struct commit *)tag;
 508                                break;
 509                        case OBJ_BLOB:
 510                                handle_object(tag->object.sha1);
 511                                continue;
 512                        default: /* OBJ_TAG (nested tags) is already handled */
 513                                warning("Tag points to object of unexpected type %s, skipping.",
 514                                        typename(tag->object.type));
 515                                continue;
 516                        }
 517                        break;
 518                default:
 519                        warning("%s: Unexpected object of type %s, skipping.",
 520                                e->name,
 521                                typename(e->item->type));
 522                        continue;
 523                }
 524                if (commit->util)
 525                        /* more than one name for the same object */
 526                        string_list_append(extra_refs, full_name)->util = commit;
 527                else
 528                        commit->util = full_name;
 529        }
 530}
 531
 532static void handle_tags_and_duplicates(struct string_list *extra_refs)
 533{
 534        struct commit *commit;
 535        int i;
 536
 537        for (i = extra_refs->nr - 1; i >= 0; i--) {
 538                const char *name = extra_refs->items[i].string;
 539                struct object *object = extra_refs->items[i].util;
 540                switch (object->type) {
 541                case OBJ_TAG:
 542                        handle_tag(name, (struct tag *)object);
 543                        break;
 544                case OBJ_COMMIT:
 545                        /* create refs pointing to already seen commits */
 546                        commit = (struct commit *)object;
 547                        printf("reset %s\nfrom :%d\n\n", name,
 548                               get_object_mark(&commit->object));
 549                        show_progress();
 550                        break;
 551                }
 552        }
 553}
 554
 555static void export_marks(char *file)
 556{
 557        unsigned int i;
 558        uint32_t mark;
 559        struct object_decoration *deco = idnums.hash;
 560        FILE *f;
 561        int e = 0;
 562
 563        f = fopen(file, "w");
 564        if (!f)
 565                die_errno("Unable to open marks file %s for writing.", file);
 566
 567        for (i = 0; i < idnums.size; i++) {
 568                if (deco->base && deco->base->type == 1) {
 569                        mark = ptr_to_mark(deco->decoration);
 570                        if (fprintf(f, ":%"PRIu32" %s\n", mark,
 571                                sha1_to_hex(deco->base->sha1)) < 0) {
 572                            e = 1;
 573                            break;
 574                        }
 575                }
 576                deco++;
 577        }
 578
 579        e |= ferror(f);
 580        e |= fclose(f);
 581        if (e)
 582                error("Unable to write marks file %s.", file);
 583}
 584
 585static void import_marks(char *input_file)
 586{
 587        char line[512];
 588        FILE *f = fopen(input_file, "r");
 589        if (!f)
 590                die_errno("cannot read '%s'", input_file);
 591
 592        while (fgets(line, sizeof(line), f)) {
 593                uint32_t mark;
 594                char *line_end, *mark_end;
 595                unsigned char sha1[20];
 596                struct object *object;
 597
 598                line_end = strchr(line, '\n');
 599                if (line[0] != ':' || !line_end)
 600                        die("corrupt mark line: %s", line);
 601                *line_end = '\0';
 602
 603                mark = strtoumax(line + 1, &mark_end, 10);
 604                if (!mark || mark_end == line + 1
 605                        || *mark_end != ' ' || get_sha1(mark_end + 1, sha1))
 606                        die("corrupt mark line: %s", line);
 607
 608                object = parse_object(sha1);
 609                if (!object)
 610                        die ("Could not read blob %s", sha1_to_hex(sha1));
 611
 612                if (object->flags & SHOWN)
 613                        error("Object %s already has a mark", sha1);
 614
 615                mark_object(object, mark);
 616                if (last_idnum < mark)
 617                        last_idnum = mark;
 618
 619                object->flags |= SHOWN;
 620        }
 621        fclose(f);
 622}
 623
 624int cmd_fast_export(int argc, const char **argv, const char *prefix)
 625{
 626        struct rev_info revs;
 627        struct object_array commits = OBJECT_ARRAY_INIT;
 628        struct string_list extra_refs = STRING_LIST_INIT_NODUP;
 629        struct commit *commit;
 630        char *export_filename = NULL, *import_filename = NULL;
 631        struct option options[] = {
 632                OPT_INTEGER(0, "progress", &progress,
 633                            "show progress after <n> objects"),
 634                OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, "mode",
 635                             "select handling of signed tags",
 636                             parse_opt_signed_tag_mode),
 637                OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, "mode",
 638                             "select handling of tags that tag filtered objects",
 639                             parse_opt_tag_of_filtered_mode),
 640                OPT_STRING(0, "export-marks", &export_filename, "file",
 641                             "Dump marks to this file"),
 642                OPT_STRING(0, "import-marks", &import_filename, "file",
 643                             "Import marks from this file"),
 644                OPT_BOOLEAN(0, "fake-missing-tagger", &fake_missing_tagger,
 645                             "Fake a tagger when tags lack one"),
 646                OPT_BOOLEAN(0, "full-tree", &full_tree,
 647                             "Output full tree for each commit"),
 648                OPT_BOOLEAN(0, "use-done-feature", &use_done_feature,
 649                             "Use the done feature to terminate the stream"),
 650                OPT_BOOL(0, "no-data", &no_data, "Skip output of blob data"),
 651                OPT_END()
 652        };
 653
 654        if (argc == 1)
 655                usage_with_options (fast_export_usage, options);
 656
 657        /* we handle encodings */
 658        git_config(git_default_config, NULL);
 659
 660        init_revisions(&revs, prefix);
 661        revs.topo_order = 1;
 662        revs.show_source = 1;
 663        revs.rewrite_parents = 1;
 664        argc = setup_revisions(argc, argv, &revs, NULL);
 665        argc = parse_options(argc, argv, prefix, options, fast_export_usage, 0);
 666        if (argc > 1)
 667                usage_with_options (fast_export_usage, options);
 668
 669        if (use_done_feature)
 670                printf("feature done\n");
 671
 672        if (import_filename)
 673                import_marks(import_filename);
 674
 675        if (import_filename && revs.prune_data.nr)
 676                full_tree = 1;
 677
 678        get_tags_and_duplicates(&revs.pending, &extra_refs);
 679
 680        if (prepare_revision_walk(&revs))
 681                die("revision walk setup failed");
 682        revs.diffopt.format_callback = show_filemodify;
 683        DIFF_OPT_SET(&revs.diffopt, RECURSIVE);
 684        while ((commit = get_revision(&revs))) {
 685                if (has_unshown_parent(commit)) {
 686                        add_object_array(&commit->object, NULL, &commits);
 687                }
 688                else {
 689                        handle_commit(commit, &revs);
 690                        handle_tail(&commits, &revs);
 691                }
 692        }
 693
 694        handle_tags_and_duplicates(&extra_refs);
 695
 696        if (export_filename)
 697                export_marks(export_filename);
 698
 699        if (use_done_feature)
 700                printf("done\n");
 701
 702        return 0;
 703}