builtin-fast-export.con commit gitweb: parse_commit_text encoding fix (5ed5bbc)
   1/*
   2 * "git fast-export" builtin command
   3 *
   4 * Copyright (C) 2007 Johannes E. Schindelin
   5 */
   6#include "builtin.h"
   7#include "cache.h"
   8#include "commit.h"
   9#include "object.h"
  10#include "tag.h"
  11#include "diff.h"
  12#include "diffcore.h"
  13#include "log-tree.h"
  14#include "revision.h"
  15#include "decorate.h"
  16#include "string-list.h"
  17#include "utf8.h"
  18#include "parse-options.h"
  19
  20static const char *fast_export_usage[] = {
  21        "git fast-export [rev-list-opts]",
  22        NULL
  23};
  24
  25static int progress;
  26static enum { ABORT, VERBATIM, WARN, STRIP } signed_tag_mode = ABORT;
  27static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ABORT;
  28static int fake_missing_tagger;
  29
  30static int parse_opt_signed_tag_mode(const struct option *opt,
  31                                     const char *arg, int unset)
  32{
  33        if (unset || !strcmp(arg, "abort"))
  34                signed_tag_mode = ABORT;
  35        else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
  36                signed_tag_mode = VERBATIM;
  37        else if (!strcmp(arg, "warn"))
  38                signed_tag_mode = WARN;
  39        else if (!strcmp(arg, "strip"))
  40                signed_tag_mode = STRIP;
  41        else
  42                return error("Unknown signed-tag mode: %s", arg);
  43        return 0;
  44}
  45
  46static int parse_opt_tag_of_filtered_mode(const struct option *opt,
  47                                          const char *arg, int unset)
  48{
  49        if (unset || !strcmp(arg, "abort"))
  50                tag_of_filtered_mode = ABORT;
  51        else if (!strcmp(arg, "drop"))
  52                tag_of_filtered_mode = DROP;
  53        else if (!strcmp(arg, "rewrite"))
  54                tag_of_filtered_mode = REWRITE;
  55        else
  56                return error("Unknown tag-of-filtered mode: %s", arg);
  57        return 0;
  58}
  59
  60static struct decoration idnums;
  61static uint32_t last_idnum;
  62
  63static int has_unshown_parent(struct commit *commit)
  64{
  65        struct commit_list *parent;
  66
  67        for (parent = commit->parents; parent; parent = parent->next)
  68                if (!(parent->item->object.flags & SHOWN) &&
  69                    !(parent->item->object.flags & UNINTERESTING))
  70                        return 1;
  71        return 0;
  72}
  73
  74/* Since intptr_t is C99, we do not use it here */
  75static inline uint32_t *mark_to_ptr(uint32_t mark)
  76{
  77        return ((uint32_t *)NULL) + mark;
  78}
  79
  80static inline uint32_t ptr_to_mark(void * mark)
  81{
  82        return (uint32_t *)mark - (uint32_t *)NULL;
  83}
  84
  85static inline void mark_object(struct object *object, uint32_t mark)
  86{
  87        add_decoration(&idnums, object, mark_to_ptr(mark));
  88}
  89
  90static inline void mark_next_object(struct object *object)
  91{
  92        mark_object(object, ++last_idnum);
  93}
  94
  95static int get_object_mark(struct object *object)
  96{
  97        void *decoration = lookup_decoration(&idnums, object);
  98        if (!decoration)
  99                return 0;
 100        return ptr_to_mark(decoration);
 101}
 102
 103static void show_progress(void)
 104{
 105        static int counter = 0;
 106        if (!progress)
 107                return;
 108        if ((++counter % progress) == 0)
 109                printf("progress %d objects\n", counter);
 110}
 111
 112static void handle_object(const unsigned char *sha1)
 113{
 114        unsigned long size;
 115        enum object_type type;
 116        char *buf;
 117        struct object *object;
 118
 119        if (is_null_sha1(sha1))
 120                return;
 121
 122        object = parse_object(sha1);
 123        if (!object)
 124                die ("Could not read blob %s", sha1_to_hex(sha1));
 125
 126        if (object->flags & SHOWN)
 127                return;
 128
 129        buf = read_sha1_file(sha1, &type, &size);
 130        if (!buf)
 131                die ("Could not read blob %s", sha1_to_hex(sha1));
 132
 133        mark_next_object(object);
 134
 135        printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size);
 136        if (size && fwrite(buf, size, 1, stdout) != 1)
 137                die_errno ("Could not write blob '%s'", sha1_to_hex(sha1));
 138        printf("\n");
 139
 140        show_progress();
 141
 142        object->flags |= SHOWN;
 143        free(buf);
 144}
 145
 146static void show_filemodify(struct diff_queue_struct *q,
 147                            struct diff_options *options, void *data)
 148{
 149        int i;
 150        for (i = 0; i < q->nr; i++) {
 151                struct diff_filespec *ospec = q->queue[i]->one;
 152                struct diff_filespec *spec = q->queue[i]->two;
 153
 154                switch (q->queue[i]->status) {
 155                case DIFF_STATUS_DELETED:
 156                        printf("D %s\n", spec->path);
 157                        break;
 158
 159                case DIFF_STATUS_COPIED:
 160                case DIFF_STATUS_RENAMED:
 161                        printf("%c \"%s\" \"%s\"\n", q->queue[i]->status,
 162                               ospec->path, spec->path);
 163
 164                        if (!hashcmp(ospec->sha1, spec->sha1) &&
 165                            ospec->mode == spec->mode)
 166                                break;
 167                        /* fallthrough */
 168
 169                case DIFF_STATUS_TYPE_CHANGED:
 170                case DIFF_STATUS_MODIFIED:
 171                case DIFF_STATUS_ADDED:
 172                        /*
 173                         * Links refer to objects in another repositories;
 174                         * output the SHA-1 verbatim.
 175                         */
 176                        if (S_ISGITLINK(spec->mode))
 177                                printf("M %06o %s %s\n", spec->mode,
 178                                       sha1_to_hex(spec->sha1), spec->path);
 179                        else {
 180                                struct object *object = lookup_object(spec->sha1);
 181                                printf("M %06o :%d %s\n", spec->mode,
 182                                       get_object_mark(object), spec->path);
 183                        }
 184                        break;
 185
 186                default:
 187                        die("Unexpected comparison status '%c' for %s, %s",
 188                                q->queue[i]->status,
 189                                ospec->path ? ospec->path : "none",
 190                                spec->path ? spec->path : "none");
 191                }
 192        }
 193}
 194
 195static const char *find_encoding(const char *begin, const char *end)
 196{
 197        const char *needle = "\nencoding ";
 198        char *bol, *eol;
 199
 200        bol = memmem(begin, end ? end - begin : strlen(begin),
 201                     needle, strlen(needle));
 202        if (!bol)
 203                return git_commit_encoding;
 204        bol += strlen(needle);
 205        eol = strchrnul(bol, '\n');
 206        *eol = '\0';
 207        return bol;
 208}
 209
 210static void handle_commit(struct commit *commit, struct rev_info *rev)
 211{
 212        int saved_output_format = rev->diffopt.output_format;
 213        const char *author, *author_end, *committer, *committer_end;
 214        const char *encoding, *message;
 215        char *reencoded = NULL;
 216        struct commit_list *p;
 217        int i;
 218
 219        rev->diffopt.output_format = DIFF_FORMAT_CALLBACK;
 220
 221        parse_commit(commit);
 222        author = strstr(commit->buffer, "\nauthor ");
 223        if (!author)
 224                die ("Could not find author in commit %s",
 225                     sha1_to_hex(commit->object.sha1));
 226        author++;
 227        author_end = strchrnul(author, '\n');
 228        committer = strstr(author_end, "\ncommitter ");
 229        if (!committer)
 230                die ("Could not find committer in commit %s",
 231                     sha1_to_hex(commit->object.sha1));
 232        committer++;
 233        committer_end = strchrnul(committer, '\n');
 234        message = strstr(committer_end, "\n\n");
 235        encoding = find_encoding(committer_end, message);
 236        if (message)
 237                message += 2;
 238
 239        if (commit->parents &&
 240            get_object_mark(&commit->parents->item->object) != 0) {
 241                parse_commit(commit->parents->item);
 242                diff_tree_sha1(commit->parents->item->tree->object.sha1,
 243                               commit->tree->object.sha1, "", &rev->diffopt);
 244        }
 245        else
 246                diff_root_tree_sha1(commit->tree->object.sha1,
 247                                    "", &rev->diffopt);
 248
 249        /* Export the referenced blobs, and remember the marks. */
 250        for (i = 0; i < diff_queued_diff.nr; i++)
 251                if (!S_ISGITLINK(diff_queued_diff.queue[i]->two->mode))
 252                        handle_object(diff_queued_diff.queue[i]->two->sha1);
 253
 254        mark_next_object(&commit->object);
 255        if (!is_encoding_utf8(encoding))
 256                reencoded = reencode_string(message, "UTF-8", encoding);
 257        if (!commit->parents)
 258                printf("reset %s\n", (const char*)commit->util);
 259        printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
 260               (const char *)commit->util, last_idnum,
 261               (int)(author_end - author), author,
 262               (int)(committer_end - committer), committer,
 263               (unsigned)(reencoded
 264                          ? strlen(reencoded) : message
 265                          ? strlen(message) : 0),
 266               reencoded ? reencoded : message ? message : "");
 267        free(reencoded);
 268
 269        for (i = 0, p = commit->parents; p; p = p->next) {
 270                int mark = get_object_mark(&p->item->object);
 271                if (!mark)
 272                        continue;
 273                if (i == 0)
 274                        printf("from :%d\n", mark);
 275                else
 276                        printf("merge :%d\n", mark);
 277                i++;
 278        }
 279
 280        log_tree_diff_flush(rev);
 281        rev->diffopt.output_format = saved_output_format;
 282
 283        printf("\n");
 284
 285        show_progress();
 286}
 287
 288static void handle_tail(struct object_array *commits, struct rev_info *revs)
 289{
 290        struct commit *commit;
 291        while (commits->nr) {
 292                commit = (struct commit *)commits->objects[commits->nr - 1].item;
 293                if (has_unshown_parent(commit))
 294                        return;
 295                handle_commit(commit, revs);
 296                commits->nr--;
 297        }
 298}
 299
 300static void handle_tag(const char *name, struct tag *tag)
 301{
 302        unsigned long size;
 303        enum object_type type;
 304        char *buf;
 305        const char *tagger, *tagger_end, *message;
 306        size_t message_size = 0;
 307        struct object *tagged;
 308        int tagged_mark;
 309        struct commit *p;
 310
 311        /* Trees have no identifer in fast-export output, thus we have no way
 312         * to output tags of trees, tags of tags of trees, etc.  Simply omit
 313         * such tags.
 314         */
 315        tagged = tag->tagged;
 316        while (tagged->type == OBJ_TAG) {
 317                tagged = ((struct tag *)tagged)->tagged;
 318        }
 319        if (tagged->type == OBJ_TREE) {
 320                warning("Omitting tag %s,\nsince tags of trees (or tags of tags of trees, etc.) are not supported.",
 321                        sha1_to_hex(tag->object.sha1));
 322                return;
 323        }
 324
 325        buf = read_sha1_file(tag->object.sha1, &type, &size);
 326        if (!buf)
 327                die ("Could not read tag %s", sha1_to_hex(tag->object.sha1));
 328        message = memmem(buf, size, "\n\n", 2);
 329        if (message) {
 330                message += 2;
 331                message_size = strlen(message);
 332        }
 333        tagger = memmem(buf, message ? message - buf : size, "\ntagger ", 8);
 334        if (!tagger) {
 335                if (fake_missing_tagger)
 336                        tagger = "tagger Unspecified Tagger "
 337                                "<unspecified-tagger> 0 +0000";
 338                else
 339                        tagger = "";
 340                tagger_end = tagger + strlen(tagger);
 341        } else {
 342                tagger++;
 343                tagger_end = strchrnul(tagger, '\n');
 344        }
 345
 346        /* handle signed tags */
 347        if (message) {
 348                const char *signature = strstr(message,
 349                                               "\n-----BEGIN PGP SIGNATURE-----\n");
 350                if (signature)
 351                        switch(signed_tag_mode) {
 352                        case ABORT:
 353                                die ("Encountered signed tag %s; use "
 354                                     "--signed-tag=<mode> to handle it.",
 355                                     sha1_to_hex(tag->object.sha1));
 356                        case WARN:
 357                                warning ("Exporting signed tag %s",
 358                                         sha1_to_hex(tag->object.sha1));
 359                                /* fallthru */
 360                        case VERBATIM:
 361                                break;
 362                        case STRIP:
 363                                message_size = signature + 1 - message;
 364                                break;
 365                        }
 366        }
 367
 368        /* handle tag->tagged having been filtered out due to paths specified */
 369        tagged = tag->tagged;
 370        tagged_mark = get_object_mark(tagged);
 371        if (!tagged_mark) {
 372                switch(tag_of_filtered_mode) {
 373                case ABORT:
 374                        die ("Tag %s tags unexported object; use "
 375                             "--tag-of-filtered-object=<mode> to handle it.",
 376                             sha1_to_hex(tag->object.sha1));
 377                case DROP:
 378                        /* Ignore this tag altogether */
 379                        return;
 380                case REWRITE:
 381                        if (tagged->type != OBJ_COMMIT) {
 382                                die ("Tag %s tags unexported %s!",
 383                                     sha1_to_hex(tag->object.sha1),
 384                                     typename(tagged->type));
 385                        }
 386                        p = (struct commit *)tagged;
 387                        for (;;) {
 388                                if (p->parents && p->parents->next)
 389                                        break;
 390                                if (p->object.flags & UNINTERESTING)
 391                                        break;
 392                                if (!(p->object.flags & TREESAME))
 393                                        break;
 394                                if (!p->parents)
 395                                        die ("Can't find replacement commit for tag %s\n",
 396                                             sha1_to_hex(tag->object.sha1));
 397                                p = p->parents->item;
 398                        }
 399                        tagged_mark = get_object_mark(&p->object);
 400                }
 401        }
 402
 403        if (!prefixcmp(name, "refs/tags/"))
 404                name += 10;
 405        printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n",
 406               name, tagged_mark,
 407               (int)(tagger_end - tagger), tagger,
 408               tagger == tagger_end ? "" : "\n",
 409               (int)message_size, (int)message_size, message ? message : "");
 410}
 411
 412static void get_tags_and_duplicates(struct object_array *pending,
 413                                    struct string_list *extra_refs)
 414{
 415        struct tag *tag;
 416        int i;
 417
 418        for (i = 0; i < pending->nr; i++) {
 419                struct object_array_entry *e = pending->objects + i;
 420                unsigned char sha1[20];
 421                struct commit *commit = commit;
 422                char *full_name;
 423
 424                if (dwim_ref(e->name, strlen(e->name), sha1, &full_name) != 1)
 425                        continue;
 426
 427                switch (e->item->type) {
 428                case OBJ_COMMIT:
 429                        commit = (struct commit *)e->item;
 430                        break;
 431                case OBJ_TAG:
 432                        tag = (struct tag *)e->item;
 433
 434                        /* handle nested tags */
 435                        while (tag && tag->object.type == OBJ_TAG) {
 436                                parse_object(tag->object.sha1);
 437                                string_list_append(full_name, extra_refs)->util = tag;
 438                                tag = (struct tag *)tag->tagged;
 439                        }
 440                        if (!tag)
 441                                die ("Tag %s points nowhere?", e->name);
 442                        switch(tag->object.type) {
 443                        case OBJ_COMMIT:
 444                                commit = (struct commit *)tag;
 445                                break;
 446                        case OBJ_BLOB:
 447                                handle_object(tag->object.sha1);
 448                                continue;
 449                        default: /* OBJ_TAG (nested tags) is already handled */
 450                                warning("Tag points to object of unexpected type %s, skipping.",
 451                                        typename(tag->object.type));
 452                                continue;
 453                        }
 454                        break;
 455                default:
 456                        warning("%s: Unexpected object of type %s, skipping.",
 457                                e->name,
 458                                typename(e->item->type));
 459                        continue;
 460                }
 461                if (commit->util)
 462                        /* more than one name for the same object */
 463                        string_list_append(full_name, extra_refs)->util = commit;
 464                else
 465                        commit->util = full_name;
 466        }
 467}
 468
 469static void handle_tags_and_duplicates(struct string_list *extra_refs)
 470{
 471        struct commit *commit;
 472        int i;
 473
 474        for (i = extra_refs->nr - 1; i >= 0; i--) {
 475                const char *name = extra_refs->items[i].string;
 476                struct object *object = extra_refs->items[i].util;
 477                switch (object->type) {
 478                case OBJ_TAG:
 479                        handle_tag(name, (struct tag *)object);
 480                        break;
 481                case OBJ_COMMIT:
 482                        /* create refs pointing to already seen commits */
 483                        commit = (struct commit *)object;
 484                        printf("reset %s\nfrom :%d\n\n", name,
 485                               get_object_mark(&commit->object));
 486                        show_progress();
 487                        break;
 488                }
 489        }
 490}
 491
 492static void export_marks(char *file)
 493{
 494        unsigned int i;
 495        uint32_t mark;
 496        struct object_decoration *deco = idnums.hash;
 497        FILE *f;
 498        int e = 0;
 499
 500        f = fopen(file, "w");
 501        if (!f)
 502                error("Unable to open marks file %s for writing.", file);
 503
 504        for (i = 0; i < idnums.size; i++) {
 505                if (deco->base && deco->base->type == 1) {
 506                        mark = ptr_to_mark(deco->decoration);
 507                        if (fprintf(f, ":%"PRIu32" %s\n", mark,
 508                                sha1_to_hex(deco->base->sha1)) < 0) {
 509                            e = 1;
 510                            break;
 511                        }
 512                }
 513                deco++;
 514        }
 515
 516        e |= ferror(f);
 517        e |= fclose(f);
 518        if (e)
 519                error("Unable to write marks file %s.", file);
 520}
 521
 522static void import_marks(char *input_file)
 523{
 524        char line[512];
 525        FILE *f = fopen(input_file, "r");
 526        if (!f)
 527                die_errno("cannot read '%s'", input_file);
 528
 529        while (fgets(line, sizeof(line), f)) {
 530                uint32_t mark;
 531                char *line_end, *mark_end;
 532                unsigned char sha1[20];
 533                struct object *object;
 534
 535                line_end = strchr(line, '\n');
 536                if (line[0] != ':' || !line_end)
 537                        die("corrupt mark line: %s", line);
 538                *line_end = '\0';
 539
 540                mark = strtoumax(line + 1, &mark_end, 10);
 541                if (!mark || mark_end == line + 1
 542                        || *mark_end != ' ' || get_sha1(mark_end + 1, sha1))
 543                        die("corrupt mark line: %s", line);
 544
 545                object = parse_object(sha1);
 546                if (!object)
 547                        die ("Could not read blob %s", sha1_to_hex(sha1));
 548
 549                if (object->flags & SHOWN)
 550                        error("Object %s already has a mark", sha1);
 551
 552                mark_object(object, mark);
 553                if (last_idnum < mark)
 554                        last_idnum = mark;
 555
 556                object->flags |= SHOWN;
 557        }
 558        fclose(f);
 559}
 560
 561int cmd_fast_export(int argc, const char **argv, const char *prefix)
 562{
 563        struct rev_info revs;
 564        struct object_array commits = { 0, 0, NULL };
 565        struct string_list extra_refs = { NULL, 0, 0, 0 };
 566        struct commit *commit;
 567        char *export_filename = NULL, *import_filename = NULL;
 568        struct option options[] = {
 569                OPT_INTEGER(0, "progress", &progress,
 570                            "show progress after <n> objects"),
 571                OPT_CALLBACK(0, "signed-tags", &signed_tag_mode, "mode",
 572                             "select handling of signed tags",
 573                             parse_opt_signed_tag_mode),
 574                OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, "mode",
 575                             "select handling of tags that tag filtered objects",
 576                             parse_opt_tag_of_filtered_mode),
 577                OPT_STRING(0, "export-marks", &export_filename, "FILE",
 578                             "Dump marks to this file"),
 579                OPT_STRING(0, "import-marks", &import_filename, "FILE",
 580                             "Import marks from this file"),
 581                OPT_BOOLEAN(0, "fake-missing-tagger", &fake_missing_tagger,
 582                             "Fake a tagger when tags lack one"),
 583                OPT_END()
 584        };
 585
 586        if (argc == 1)
 587                usage_with_options (fast_export_usage, options);
 588
 589        /* we handle encodings */
 590        git_config(git_default_config, NULL);
 591
 592        init_revisions(&revs, prefix);
 593        revs.topo_order = 1;
 594        revs.show_source = 1;
 595        revs.rewrite_parents = 1;
 596        argc = setup_revisions(argc, argv, &revs, NULL);
 597        argc = parse_options(argc, argv, prefix, options, fast_export_usage, 0);
 598        if (argc > 1)
 599                usage_with_options (fast_export_usage, options);
 600
 601        if (import_filename)
 602                import_marks(import_filename);
 603
 604        get_tags_and_duplicates(&revs.pending, &extra_refs);
 605
 606        if (prepare_revision_walk(&revs))
 607                die("revision walk setup failed");
 608        revs.diffopt.format_callback = show_filemodify;
 609        DIFF_OPT_SET(&revs.diffopt, RECURSIVE);
 610        while ((commit = get_revision(&revs))) {
 611                if (has_unshown_parent(commit)) {
 612                        add_object_array(&commit->object, NULL, &commits);
 613                }
 614                else {
 615                        handle_commit(commit, &revs);
 616                        handle_tail(&commits, &revs);
 617                }
 618        }
 619
 620        handle_tags_and_duplicates(&extra_refs);
 621
 622        if (export_filename)
 623                export_marks(export_filename);
 624
 625        return 0;
 626}