Merge branch 'en/fast-export-import'
authorJunio C Hamano <gitster@pobox.com>
Fri, 4 Jan 2019 21:33:33 +0000 (13:33 -0800)
committerJunio C Hamano <gitster@pobox.com>
Fri, 4 Jan 2019 21:33:33 +0000 (13:33 -0800)
Small fixes and features for fast-export and fast-import, mostly on
the fast-export side.

* en/fast-export-import:
fast-export: add a --show-original-ids option to show original names
fast-import: remove unmaintained duplicate documentation
fast-export: add --reference-excluded-parents option
fast-export: ensure we export requested refs
fast-export: when using paths, avoid corrupt stream with non-existent mark
fast-export: move commit rewriting logic into a function for reuse
fast-export: avoid dying when filtering by paths and old tags exist
fast-export: use value from correct enum
git-fast-export.txt: clarify misleading documentation about rev-list args
git-fast-import.txt: fix documentation for --quiet option
fast-export: convert sha1 to oid

1  2 
builtin/fast-export.c
fast-import.c
diff --combined builtin/fast-export.c
index 5790f0d554b0aed2ea36a38c289cc6ce2c3c0faf,36c2575de5251c38b29173af9362f52dd161191d..9e283482efcfa6de0376cc9306061cea149b12df
@@@ -31,13 -31,16 +31,16 @@@ static const char *fast_export_usage[] 
  };
  
  static int progress;
- static enum { ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = ABORT;
- static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ERROR;
+ static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT;
+ static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT;
  static int fake_missing_tagger;
  static int use_done_feature;
  static int no_data;
  static int full_tree;
+ static int reference_excluded_commits;
+ static int show_original_ids;
  static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
+ static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
  static struct refspec refspecs = REFSPEC_INIT_FETCH;
  static int anonymize;
  static struct revision_sources revision_sources;
@@@ -46,7 -49,7 +49,7 @@@ static int parse_opt_signed_tag_mode(co
                                     const char *arg, int unset)
  {
        if (unset || !strcmp(arg, "abort"))
-               signed_tag_mode = ABORT;
+               signed_tag_mode = SIGNED_TAG_ABORT;
        else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore"))
                signed_tag_mode = VERBATIM;
        else if (!strcmp(arg, "warn"))
@@@ -64,7 -67,7 +67,7 @@@ static int parse_opt_tag_of_filtered_mo
                                          const char *arg, int unset)
  {
        if (unset || !strcmp(arg, "abort"))
-               tag_of_filtered_mode = ERROR;
+               tag_of_filtered_mode = TAG_FILTERING_ABORT;
        else if (!strcmp(arg, "drop"))
                tag_of_filtered_mode = DROP;
        else if (!strcmp(arg, "rewrite"))
@@@ -187,6 -190,22 +190,22 @@@ static int get_object_mark(struct objec
        return ptr_to_mark(decoration);
  }
  
+ static struct commit *rewrite_commit(struct commit *p)
+ {
+       for (;;) {
+               if (p->parents && p->parents->next)
+                       break;
+               if (p->object.flags & UNINTERESTING)
+                       break;
+               if (!(p->object.flags & TREESAME))
+                       break;
+               if (!p->parents)
+                       return NULL;
+               p = p->parents->item;
+       }
+       return p;
+ }
  static void show_progress(void)
  {
        static int counter = 0;
@@@ -243,7 -262,7 +262,7 @@@ static void export_blob(const struct ob
                if (!buf)
                        die("could not read blob %s", oid_to_hex(oid));
                if (check_object_signature(oid, buf, size, type_name(type)) < 0)
-                       die("sha1 mismatch in blob %s", oid_to_hex(oid));
+                       die("oid mismatch in blob %s", oid_to_hex(oid));
                object = parse_object_buffer(the_repository, oid, type,
                                             size, buf, &eaten);
        }
  
        mark_next_object(object);
  
-       printf("blob\nmark :%"PRIu32"\ndata %"PRIuMAX"\n", last_idnum, (uintmax_t)size);
+       printf("blob\nmark :%"PRIu32"\n", last_idnum);
+       if (show_original_ids)
+               printf("original-oid %s\n", oid_to_hex(oid));
 -      printf("data %lu\n", size);
++      printf("data %"PRIuMAX"\n", (uintmax_t)size);
        if (size && fwrite(buf, size, 1, stdout) != 1)
                die_errno("could not write blob '%s'", oid_to_hex(oid));
        printf("\n");
@@@ -330,17 -352,18 +352,18 @@@ static void print_path(const char *path
  
  static void *generate_fake_oid(const void *old, size_t *len)
  {
-       static uint32_t counter = 1; /* avoid null sha1 */
-       unsigned char *out = xcalloc(GIT_SHA1_RAWSZ, 1);
-       put_be32(out + GIT_SHA1_RAWSZ - 4, counter++);
+       static uint32_t counter = 1; /* avoid null oid */
+       const unsigned hashsz = the_hash_algo->rawsz;
+       unsigned char *out = xcalloc(hashsz, 1);
+       put_be32(out + hashsz - 4, counter++);
        return out;
  }
  
- static const unsigned char *anonymize_sha1(const struct object_id *oid)
+ static const struct object_id *anonymize_oid(const struct object_id *oid)
  {
-       static struct hashmap sha1s;
-       size_t len = GIT_SHA1_RAWSZ;
-       return anonymize_mem(&sha1s, generate_fake_oid, oid, &len);
+       static struct hashmap objs;
+       size_t len = the_hash_algo->rawsz;
+       return anonymize_mem(&objs, generate_fake_oid, oid, &len);
  }
  
  static void show_filemodify(struct diff_queue_struct *q,
                         */
                        if (no_data || S_ISGITLINK(spec->mode))
                                printf("M %06o %s ", spec->mode,
-                                      sha1_to_hex(anonymize ?
-                                                  anonymize_sha1(&spec->oid) :
-                                                  spec->oid.hash));
+                                      oid_to_hex(anonymize ?
+                                                 anonymize_oid(&spec->oid) :
+                                                 &spec->oid));
                        else {
                                struct object *object = lookup_object(the_repository,
                                                                      spec->oid.hash);
@@@ -579,7 -602,8 +602,8 @@@ static void handle_commit(struct commi
                message += 2;
  
        if (commit->parents &&
-           get_object_mark(&commit->parents->item->object) != 0 &&
+           (get_object_mark(&commit->parents->item->object) != 0 ||
+            reference_excluded_commits) &&
            !full_tree) {
                parse_commit_or_die(commit->parents->item);
                diff_tree_oid(get_commit_tree_oid(commit->parents->item),
                        export_blob(&diff_queued_diff.queue[i]->two->oid);
  
        refname = *revision_sources_at(&revision_sources, commit);
+       /*
+        * FIXME: string_list_remove() below for each ref is overall
+        * O(N^2).  Compared to a history walk and diffing trees, this is
+        * just lost in the noise in practice.  However, theoretically a
+        * repo may have enough refs for this to become slow.
+        */
+       string_list_remove(&extra_refs, refname, 0);
        if (anonymize) {
                refname = anonymize_refname(refname);
                anonymize_ident_line(&committer, &committer_end);
                reencoded = reencode_string(message, "UTF-8", encoding);
        if (!commit->parents)
                printf("reset %s\n", refname);
-       printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
-              refname, last_idnum,
+       printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
+       if (show_original_ids)
+               printf("original-oid %s\n", oid_to_hex(&commit->object.oid));
+       printf("%.*s\n%.*s\ndata %u\n%s",
               (int)(author_end - author), author,
               (int)(committer_end - committer), committer,
               (unsigned)(reencoded
        unuse_commit_buffer(commit, commit_buffer);
  
        for (i = 0, p = commit->parents; p; p = p->next) {
-               int mark = get_object_mark(&p->item->object);
-               if (!mark)
+               struct object *obj = &p->item->object;
+               int mark = get_object_mark(obj);
+               if (!mark && !reference_excluded_commits)
                        continue;
                if (i == 0)
-                       printf("from :%d\n", mark);
+                       printf("from ");
+               else
+                       printf("merge ");
+               if (mark)
+                       printf(":%d\n", mark);
                else
-                       printf("merge :%d\n", mark);
+                       printf("%s\n", oid_to_hex(anonymize ?
+                                                 anonymize_oid(&obj->oid) :
+                                                 &obj->oid));
                i++;
        }
  
@@@ -727,7 -768,7 +768,7 @@@ static void handle_tag(const char *name
                                               "\n-----BEGIN PGP SIGNATURE-----\n");
                if (signature)
                        switch(signed_tag_mode) {
-                       case ABORT:
+                       case SIGNED_TAG_ABORT:
                                die("encountered signed tag %s; use "
                                    "--signed-tags=<mode> to handle it",
                                    oid_to_hex(&tag->object.oid));
        tagged_mark = get_object_mark(tagged);
        if (!tagged_mark) {
                switch(tag_of_filtered_mode) {
-               case ABORT:
+               case TAG_FILTERING_ABORT:
                        die("tag %s tags unexported object; use "
                            "--tag-of-filtered-object=<mode> to handle it",
                            oid_to_hex(&tag->object.oid));
                                    oid_to_hex(&tag->object.oid),
                                    type_name(tagged->type));
                        }
-                       p = (struct commit *)tagged;
-                       for (;;) {
-                               if (p->parents && p->parents->next)
-                                       break;
-                               if (p->object.flags & UNINTERESTING)
-                                       break;
-                               if (!(p->object.flags & TREESAME))
-                                       break;
-                               if (!p->parents)
-                                       die("can't find replacement commit for tag %s",
-                                            oid_to_hex(&tag->object.oid));
-                               p = p->parents->item;
+                       p = rewrite_commit((struct commit *)tagged);
+                       if (!p) {
+                               printf("reset %s\nfrom %s\n\n",
+                                      name, oid_to_hex(&null_oid));
+                               free(buf);
+                               return;
                        }
                        tagged_mark = get_object_mark(&p->object);
                }
  
        if (starts_with(name, "refs/tags/"))
                name += 10;
-       printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n",
-              name, tagged_mark,
+       printf("tag %s\nfrom :%d\n", name, tagged_mark);
+       if (show_original_ids)
+               printf("original-oid %s\n", oid_to_hex(&tag->object.oid));
+       printf("%.*s%sdata %d\n%.*s\n",
               (int)(tagger_end - tagger), tagger,
               tagger == tagger_end ? "" : "\n",
               (int)message_size, (int)message_size, message ? message : "");
@@@ -804,7 -841,7 +841,7 @@@ static struct commit *get_commit(struc
                /* handle nested tags */
                while (tag && tag->object.type == OBJ_TAG) {
                        parse_object(the_repository, &tag->object.oid);
-                       string_list_append(&extra_refs, full_name)->util = tag;
+                       string_list_append(&tag_refs, full_name)->util = tag;
                        tag = (struct tag *)tag->tagged;
                }
                if (!tag)
@@@ -863,25 -900,30 +900,30 @@@ static void get_tags_and_duplicates(str
                }
  
                /*
-                * This ref will not be updated through a commit, lets make
-                * sure it gets properly updated eventually.
+                * Make sure this ref gets properly updated eventually, whether
+                * through a commit or manually at the end.
                 */
-               if (*revision_sources_at(&revision_sources, commit) ||
-                   commit->object.flags & SHOWN)
+               if (e->item->type != OBJ_TAG)
                        string_list_append(&extra_refs, full_name)->util = commit;
                if (!*revision_sources_at(&revision_sources, commit))
                        *revision_sources_at(&revision_sources, commit) = full_name;
        }
+       string_list_sort(&extra_refs);
+       string_list_remove_duplicates(&extra_refs, 0);
  }
  
- static void handle_tags_and_duplicates(void)
+ static void handle_tags_and_duplicates(struct string_list *extras)
  {
        struct commit *commit;
        int i;
  
-       for (i = extra_refs.nr - 1; i >= 0; i--) {
-               const char *name = extra_refs.items[i].string;
-               struct object *object = extra_refs.items[i].util;
+       for (i = extras->nr - 1; i >= 0; i--) {
+               const char *name = extras->items[i].string;
+               struct object *object = extras->items[i].util;
+               int mark;
                switch (object->type) {
                case OBJ_TAG:
                        handle_tag(name, (struct tag *)object);
                        if (anonymize)
                                name = anonymize_refname(name);
                        /* create refs pointing to already seen commits */
-                       commit = (struct commit *)object;
-                       printf("reset %s\nfrom :%d\n\n", name,
-                              get_object_mark(&commit->object));
+                       commit = rewrite_commit((struct commit *)object);
+                       if (!commit) {
+                               /*
+                                * Neither this object nor any of its
+                                * ancestors touch any relevant paths, so
+                                * it has been filtered to nothing.  Delete
+                                * it.
+                                */
+                               printf("reset %s\nfrom %s\n\n",
+                                      name, oid_to_hex(&null_oid));
+                               continue;
+                       }
+                       mark = get_object_mark(&commit->object);
+                       if (!mark) {
+                               /*
+                                * Getting here means we have a commit which
+                                * was excluded by a negative refspec (e.g.
+                                * fast-export ^master master).  If we are
+                                * referencing excluded commits, set the ref
+                                * to the exact commit.  Otherwise, the user
+                                * wants the branch exported but every commit
+                                * in its history to be deleted, which basically
+                                * just means deletion of the ref.
+                                */
+                               if (!reference_excluded_commits) {
+                                       /* delete the ref */
+                                       printf("reset %s\nfrom %s\n\n",
+                                              name, oid_to_hex(&null_oid));
+                                       continue;
+                               }
+                               /* set ref to commit using oid, not mark */
+                               printf("reset %s\nfrom %s\n\n", name,
+                                      oid_to_hex(&commit->object.oid));
+                               continue;
+                       }
+                       printf("reset %s\nfrom :%d\n\n", name, mark
+                              );
                        show_progress();
                        break;
                }
@@@ -988,7 -1066,7 +1066,7 @@@ static void handle_deletes(void
                        continue;
  
                printf("reset %s\nfrom %s\n\n",
-                               refspec->dst, sha1_to_hex(null_sha1));
+                               refspec->dst, oid_to_hex(&null_oid));
        }
  }
  
@@@ -1024,6 -1102,11 +1102,11 @@@ int cmd_fast_export(int argc, const cha
                OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"),
                             N_("Apply refspec to exported refs")),
                OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
+               OPT_BOOL(0, "reference-excluded-parents",
+                        &reference_excluded_commits, N_("Reference parents which are not in fast-export stream by object id")),
+               OPT_BOOL(0, "show-original-ids", &show_original_ids,
+                           N_("Show original object ids of blobs/commits")),
                OPT_END()
        };
  
                }
        }
  
-       handle_tags_and_duplicates();
+       handle_tags_and_duplicates(&extra_refs);
+       handle_tags_and_duplicates(&tag_refs);
        handle_deletes();
  
        if (export_filename && lastimportid != last_idnum)
diff --combined fast-import.c
index 69886687ce95fd7dfe5363d0c776e21241b6b830,71b6cba00f96af4abb2dd628344d78be9640ee88..b7ba755c2b88df35d5e50272d7b9341134625c56
@@@ -1,157 -1,3 +1,3 @@@
- /*
- (See Documentation/git-fast-import.txt for maintained documentation.)
- Format of STDIN stream:
-   stream ::= cmd*;
-   cmd ::= new_blob
-         | new_commit
-         | new_tag
-         | reset_branch
-         | checkpoint
-         | progress
-         ;
-   new_blob ::= 'blob' lf
-     mark?
-     file_content;
-   file_content ::= data;
-   new_commit ::= 'commit' sp ref_str lf
-     mark?
-     ('author' (sp name)? sp '<' email '>' sp when lf)?
-     'committer' (sp name)? sp '<' email '>' sp when lf
-     commit_msg
-     ('from' sp commit-ish lf)?
-     ('merge' sp commit-ish lf)*
-     (file_change | ls)*
-     lf?;
-   commit_msg ::= data;
-   ls ::= 'ls' sp '"' quoted(path) '"' lf;
-   file_change ::= file_clr
-     | file_del
-     | file_rnm
-     | file_cpy
-     | file_obm
-     | file_inm;
-   file_clr ::= 'deleteall' lf;
-   file_del ::= 'D' sp path_str lf;
-   file_rnm ::= 'R' sp path_str sp path_str lf;
-   file_cpy ::= 'C' sp path_str sp path_str lf;
-   file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf;
-   file_inm ::= 'M' sp mode sp 'inline' sp path_str lf
-     data;
-   note_obm ::= 'N' sp (hexsha1 | idnum) sp commit-ish lf;
-   note_inm ::= 'N' sp 'inline' sp commit-ish lf
-     data;
-   new_tag ::= 'tag' sp tag_str lf
-     'from' sp commit-ish lf
-     ('tagger' (sp name)? sp '<' email '>' sp when lf)?
-     tag_msg;
-   tag_msg ::= data;
-   reset_branch ::= 'reset' sp ref_str lf
-     ('from' sp commit-ish lf)?
-     lf?;
-   checkpoint ::= 'checkpoint' lf
-     lf?;
-   progress ::= 'progress' sp not_lf* lf
-     lf?;
-      # note: the first idnum in a stream should be 1 and subsequent
-      # idnums should not have gaps between values as this will cause
-      # the stream parser to reserve space for the gapped values.  An
-      # idnum can be updated in the future to a new object by issuing
-      # a new mark directive with the old idnum.
-      #
-   mark ::= 'mark' sp idnum lf;
-   data ::= (delimited_data | exact_data)
-     lf?;
-     # note: delim may be any string but must not contain lf.
-     # data_line may contain any data but must not be exactly
-     # delim.
-   delimited_data ::= 'data' sp '<<' delim lf
-     (data_line lf)*
-     delim lf;
-      # note: declen indicates the length of binary_data in bytes.
-      # declen does not include the lf preceding the binary data.
-      #
-   exact_data ::= 'data' sp declen lf
-     binary_data;
-      # note: quoted strings are C-style quoting supporting \c for
-      # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
-      # is the signed byte value in octal.  Note that the only
-      # characters which must actually be escaped to protect the
-      # stream formatting is: \, " and LF.  Otherwise these values
-      # are UTF8.
-      #
-   commit-ish  ::= (ref_str | hexsha1 | sha1exp_str | idnum);
-   ref_str     ::= ref;
-   sha1exp_str ::= sha1exp;
-   tag_str     ::= tag;
-   path_str    ::= path    | '"' quoted(path)    '"' ;
-   mode        ::= '100644' | '644'
-                 | '100755' | '755'
-                 | '120000'
-                 ;
-   declen ::= # unsigned 32 bit value, ascii base10 notation;
-   bigint ::= # unsigned integer value, ascii base10 notation;
-   binary_data ::= # file content, not interpreted;
-   when         ::= raw_when | rfc2822_when;
-   raw_when     ::= ts sp tz;
-   rfc2822_when ::= # Valid RFC 2822 date and time;
-   sp ::= # ASCII space character;
-   lf ::= # ASCII newline (LF) character;
-      # note: a colon (':') must precede the numerical value assigned to
-      # an idnum.  This is to distinguish it from a ref or tag name as
-      # GIT does not permit ':' in ref or tag strings.
-      #
-   idnum   ::= ':' bigint;
-   path    ::= # GIT style file path, e.g. "a/b/c";
-   ref     ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
-   tag     ::= # GIT tag name, e.g. "FIREFOX_1_5";
-   sha1exp ::= # Any valid GIT SHA1 expression;
-   hexsha1 ::= # SHA1 in hexadecimal format;
-      # note: name and email are UTF8 strings, however name must not
-      # contain '<' or lf and email must not contain any of the
-      # following: '<', '>', lf.
-      #
-   name  ::= # valid GIT author/committer name;
-   email ::= # valid GIT author/committer email;
-   ts    ::= # time since the epoch in seconds, ascii base10 notation;
-   tz    ::= # GIT style timezone;
-      # note: comments, get-mark, ls-tree, and cat-blob requests may
-      # appear anywhere in the input, except within a data command. Any
-      # form of the data command always escapes the related input from
-      # comment processing.
-      #
-      # In case it is not clear, the '#' that starts the comment
-      # must be the first character on that line (an lf
-      # preceded it).
-      #
-   get_mark ::= 'get-mark' sp idnum lf;
-   cat_blob ::= 'cat-blob' sp (hexsha1 | idnum) lf;
-   ls_tree  ::= 'ls' sp (hexsha1 | idnum) sp path_str lf;
-   comment ::= '#' not_lf* lf;
-   not_lf  ::= # Any byte that is not ASCII newline (LF);
- */
  #include "builtin.h"
  #include "cache.h"
  #include "repository.h"
@@@ -1968,6 -1814,13 +1814,13 @@@ static void parse_mark(void
                next_mark = 0;
  }
  
+ static void parse_original_identifier(void)
+ {
+       const char *v;
+       if (skip_prefix(command_buf.buf, "original-oid ", &v))
+               read_next_command();
+ }
  static int parse_data(struct strbuf *sb, uintmax_t limit, uintmax_t *len_res)
  {
        const char *data;
@@@ -2110,6 -1963,7 +1963,7 @@@ static void parse_new_blob(void
  {
        read_next_command();
        parse_mark();
+       parse_original_identifier();
        parse_and_store_blob(&last_blob, NULL, next_mark);
  }
  
@@@ -2733,6 -2587,7 +2587,7 @@@ static void parse_new_commit(const cha
  
        read_next_command();
        parse_mark();
+       parse_original_identifier();
        if (skip_prefix(command_buf.buf, "author ", &v)) {
                author = parse_ident(v);
                read_next_command();
@@@ -2865,6 -2720,9 +2720,9 @@@ static void parse_new_tag(const char *a
                die("Invalid ref name or SHA1 expression: %s", from);
        read_next_command();
  
+       /* original-oid ... */
+       parse_original_identifier();
        /* tagger ... */
        if (skip_prefix(command_buf.buf, "tagger ", &v)) {
                tagger = parse_ident(v);
@@@ -2955,8 -2813,8 +2813,8 @@@ static void cat_blob(struct object_entr
                die("Object %s is a %s but a blob was expected.",
                    oid_to_hex(oid), type_name(type));
        strbuf_reset(&line);
 -      strbuf_addf(&line, "%s %s %lu\n", oid_to_hex(oid),
 -                                              type_name(type), size);
 +      strbuf_addf(&line, "%s %s %"PRIuMAX"\n", oid_to_hex(oid),
 +                  type_name(type), (uintmax_t)size);
        cat_blob_write(line.buf, line.len);
        strbuf_release(&line);
        cat_blob_write(buf, size);