From: Junio C Hamano Date: Fri, 4 Jan 2019 21:33:33 +0000 (-0800) Subject: Merge branch 'en/fast-export-import' X-Git-Tag: v2.21.0-rc0~130 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/4d59753227d6f86dec2b108704bc04e727c5347f?ds=inline;hp=-c Merge branch 'en/fast-export-import' Small fixes and features for fast-export and fast-import, mostly on the fast-export side. * en/fast-export-import: fast-export: add a --show-original-ids option to show original names fast-import: remove unmaintained duplicate documentation fast-export: add --reference-excluded-parents option fast-export: ensure we export requested refs fast-export: when using paths, avoid corrupt stream with non-existent mark fast-export: move commit rewriting logic into a function for reuse fast-export: avoid dying when filtering by paths and old tags exist fast-export: use value from correct enum git-fast-export.txt: clarify misleading documentation about rev-list args git-fast-import.txt: fix documentation for --quiet option fast-export: convert sha1 to oid --- 4d59753227d6f86dec2b108704bc04e727c5347f diff --combined builtin/fast-export.c index 5790f0d554,36c2575de5..9e283482ef --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@@ -31,13 -31,16 +31,16 @@@ static const char *fast_export_usage[] }; static int progress; - static enum { ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = ABORT; - static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ERROR; + static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT; + static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT; static int fake_missing_tagger; static int use_done_feature; static int no_data; static int full_tree; + static int reference_excluded_commits; + static int show_original_ids; static struct string_list extra_refs = STRING_LIST_INIT_NODUP; + static struct string_list tag_refs = STRING_LIST_INIT_NODUP; static struct refspec refspecs = REFSPEC_INIT_FETCH; static int anonymize; static struct revision_sources revision_sources; @@@ -46,7 -49,7 +49,7 @@@ static int parse_opt_signed_tag_mode(co const char *arg, int unset) { if (unset || !strcmp(arg, "abort")) - signed_tag_mode = ABORT; + signed_tag_mode = SIGNED_TAG_ABORT; else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore")) signed_tag_mode = VERBATIM; else if (!strcmp(arg, "warn")) @@@ -64,7 -67,7 +67,7 @@@ static int parse_opt_tag_of_filtered_mo const char *arg, int unset) { if (unset || !strcmp(arg, "abort")) - tag_of_filtered_mode = ERROR; + tag_of_filtered_mode = TAG_FILTERING_ABORT; else if (!strcmp(arg, "drop")) tag_of_filtered_mode = DROP; else if (!strcmp(arg, "rewrite")) @@@ -187,6 -190,22 +190,22 @@@ static int get_object_mark(struct objec return ptr_to_mark(decoration); } + static struct commit *rewrite_commit(struct commit *p) + { + for (;;) { + if (p->parents && p->parents->next) + break; + if (p->object.flags & UNINTERESTING) + break; + if (!(p->object.flags & TREESAME)) + break; + if (!p->parents) + return NULL; + p = p->parents->item; + } + return p; + } + static void show_progress(void) { static int counter = 0; @@@ -243,7 -262,7 +262,7 @@@ static void export_blob(const struct ob if (!buf) die("could not read blob %s", oid_to_hex(oid)); if (check_object_signature(oid, buf, size, type_name(type)) < 0) - die("sha1 mismatch in blob %s", oid_to_hex(oid)); + die("oid mismatch in blob %s", oid_to_hex(oid)); object = parse_object_buffer(the_repository, oid, type, size, buf, &eaten); } @@@ -253,7 -272,10 +272,10 @@@ mark_next_object(object); - printf("blob\nmark :%"PRIu32"\ndata %"PRIuMAX"\n", last_idnum, (uintmax_t)size); + printf("blob\nmark :%"PRIu32"\n", last_idnum); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(oid)); - printf("data %lu\n", size); ++ printf("data %"PRIuMAX"\n", (uintmax_t)size); if (size && fwrite(buf, size, 1, stdout) != 1) die_errno("could not write blob '%s'", oid_to_hex(oid)); printf("\n"); @@@ -330,17 -352,18 +352,18 @@@ static void print_path(const char *path static void *generate_fake_oid(const void *old, size_t *len) { - static uint32_t counter = 1; /* avoid null sha1 */ - unsigned char *out = xcalloc(GIT_SHA1_RAWSZ, 1); - put_be32(out + GIT_SHA1_RAWSZ - 4, counter++); + static uint32_t counter = 1; /* avoid null oid */ + const unsigned hashsz = the_hash_algo->rawsz; + unsigned char *out = xcalloc(hashsz, 1); + put_be32(out + hashsz - 4, counter++); return out; } - static const unsigned char *anonymize_sha1(const struct object_id *oid) + static const struct object_id *anonymize_oid(const struct object_id *oid) { - static struct hashmap sha1s; - size_t len = GIT_SHA1_RAWSZ; - return anonymize_mem(&sha1s, generate_fake_oid, oid, &len); + static struct hashmap objs; + size_t len = the_hash_algo->rawsz; + return anonymize_mem(&objs, generate_fake_oid, oid, &len); } static void show_filemodify(struct diff_queue_struct *q, @@@ -399,9 -422,9 +422,9 @@@ */ if (no_data || S_ISGITLINK(spec->mode)) printf("M %06o %s ", spec->mode, - sha1_to_hex(anonymize ? - anonymize_sha1(&spec->oid) : - spec->oid.hash)); + oid_to_hex(anonymize ? + anonymize_oid(&spec->oid) : + &spec->oid)); else { struct object *object = lookup_object(the_repository, spec->oid.hash); @@@ -579,7 -602,8 +602,8 @@@ static void handle_commit(struct commi message += 2; if (commit->parents && - get_object_mark(&commit->parents->item->object) != 0 && + (get_object_mark(&commit->parents->item->object) != 0 || + reference_excluded_commits) && !full_tree) { parse_commit_or_die(commit->parents->item); diff_tree_oid(get_commit_tree_oid(commit->parents->item), @@@ -595,6 -619,13 +619,13 @@@ export_blob(&diff_queued_diff.queue[i]->two->oid); refname = *revision_sources_at(&revision_sources, commit); + /* + * FIXME: string_list_remove() below for each ref is overall + * O(N^2). Compared to a history walk and diffing trees, this is + * just lost in the noise in practice. However, theoretically a + * repo may have enough refs for this to become slow. + */ + string_list_remove(&extra_refs, refname, 0); if (anonymize) { refname = anonymize_refname(refname); anonymize_ident_line(&committer, &committer_end); @@@ -608,8 -639,10 +639,10 @@@ reencoded = reencode_string(message, "UTF-8", encoding); if (!commit->parents) printf("reset %s\n", refname); - printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s", - refname, last_idnum, + printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(&commit->object.oid)); + printf("%.*s\n%.*s\ndata %u\n%s", (int)(author_end - author), author, (int)(committer_end - committer), committer, (unsigned)(reencoded @@@ -620,13 -653,21 +653,21 @@@ unuse_commit_buffer(commit, commit_buffer); for (i = 0, p = commit->parents; p; p = p->next) { - int mark = get_object_mark(&p->item->object); - if (!mark) + struct object *obj = &p->item->object; + int mark = get_object_mark(obj); + + if (!mark && !reference_excluded_commits) continue; if (i == 0) - printf("from :%d\n", mark); + printf("from "); + else + printf("merge "); + if (mark) + printf(":%d\n", mark); else - printf("merge :%d\n", mark); + printf("%s\n", oid_to_hex(anonymize ? + anonymize_oid(&obj->oid) : + &obj->oid)); i++; } @@@ -727,7 -768,7 +768,7 @@@ static void handle_tag(const char *name "\n-----BEGIN PGP SIGNATURE-----\n"); if (signature) switch(signed_tag_mode) { - case ABORT: + case SIGNED_TAG_ABORT: die("encountered signed tag %s; use " "--signed-tags= to handle it", oid_to_hex(&tag->object.oid)); @@@ -752,7 -793,7 +793,7 @@@ tagged_mark = get_object_mark(tagged); if (!tagged_mark) { switch(tag_of_filtered_mode) { - case ABORT: + case TAG_FILTERING_ABORT: die("tag %s tags unexported object; use " "--tag-of-filtered-object= to handle it", oid_to_hex(&tag->object.oid)); @@@ -766,18 -807,12 +807,12 @@@ oid_to_hex(&tag->object.oid), type_name(tagged->type)); } - p = (struct commit *)tagged; - for (;;) { - if (p->parents && p->parents->next) - break; - if (p->object.flags & UNINTERESTING) - break; - if (!(p->object.flags & TREESAME)) - break; - if (!p->parents) - die("can't find replacement commit for tag %s", - oid_to_hex(&tag->object.oid)); - p = p->parents->item; + p = rewrite_commit((struct commit *)tagged); + if (!p) { + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + free(buf); + return; } tagged_mark = get_object_mark(&p->object); } @@@ -785,8 -820,10 +820,10 @@@ if (starts_with(name, "refs/tags/")) name += 10; - printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n", - name, tagged_mark, + printf("tag %s\nfrom :%d\n", name, tagged_mark); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(&tag->object.oid)); + printf("%.*s%sdata %d\n%.*s\n", (int)(tagger_end - tagger), tagger, tagger == tagger_end ? "" : "\n", (int)message_size, (int)message_size, message ? message : ""); @@@ -804,7 -841,7 +841,7 @@@ static struct commit *get_commit(struc /* handle nested tags */ while (tag && tag->object.type == OBJ_TAG) { parse_object(the_repository, &tag->object.oid); - string_list_append(&extra_refs, full_name)->util = tag; + string_list_append(&tag_refs, full_name)->util = tag; tag = (struct tag *)tag->tagged; } if (!tag) @@@ -863,25 -900,30 +900,30 @@@ static void get_tags_and_duplicates(str } /* - * This ref will not be updated through a commit, lets make - * sure it gets properly updated eventually. + * Make sure this ref gets properly updated eventually, whether + * through a commit or manually at the end. */ - if (*revision_sources_at(&revision_sources, commit) || - commit->object.flags & SHOWN) + if (e->item->type != OBJ_TAG) string_list_append(&extra_refs, full_name)->util = commit; + if (!*revision_sources_at(&revision_sources, commit)) *revision_sources_at(&revision_sources, commit) = full_name; } + + string_list_sort(&extra_refs); + string_list_remove_duplicates(&extra_refs, 0); } - static void handle_tags_and_duplicates(void) + static void handle_tags_and_duplicates(struct string_list *extras) { struct commit *commit; int i; - for (i = extra_refs.nr - 1; i >= 0; i--) { - const char *name = extra_refs.items[i].string; - struct object *object = extra_refs.items[i].util; + for (i = extras->nr - 1; i >= 0; i--) { + const char *name = extras->items[i].string; + struct object *object = extras->items[i].util; + int mark; + switch (object->type) { case OBJ_TAG: handle_tag(name, (struct tag *)object); @@@ -890,9 -932,45 +932,45 @@@ if (anonymize) name = anonymize_refname(name); /* create refs pointing to already seen commits */ - commit = (struct commit *)object; - printf("reset %s\nfrom :%d\n\n", name, - get_object_mark(&commit->object)); + commit = rewrite_commit((struct commit *)object); + if (!commit) { + /* + * Neither this object nor any of its + * ancestors touch any relevant paths, so + * it has been filtered to nothing. Delete + * it. + */ + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + continue; + } + + mark = get_object_mark(&commit->object); + if (!mark) { + /* + * Getting here means we have a commit which + * was excluded by a negative refspec (e.g. + * fast-export ^master master). If we are + * referencing excluded commits, set the ref + * to the exact commit. Otherwise, the user + * wants the branch exported but every commit + * in its history to be deleted, which basically + * just means deletion of the ref. + */ + if (!reference_excluded_commits) { + /* delete the ref */ + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + continue; + } + /* set ref to commit using oid, not mark */ + printf("reset %s\nfrom %s\n\n", name, + oid_to_hex(&commit->object.oid)); + continue; + } + + printf("reset %s\nfrom :%d\n\n", name, mark + ); show_progress(); break; } @@@ -988,7 -1066,7 +1066,7 @@@ static void handle_deletes(void continue; printf("reset %s\nfrom %s\n\n", - refspec->dst, sha1_to_hex(null_sha1)); + refspec->dst, oid_to_hex(&null_oid)); } } @@@ -1024,6 -1102,11 +1102,11 @@@ int cmd_fast_export(int argc, const cha OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"), N_("Apply refspec to exported refs")), OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")), + OPT_BOOL(0, "reference-excluded-parents", + &reference_excluded_commits, N_("Reference parents which are not in fast-export stream by object id")), + OPT_BOOL(0, "show-original-ids", &show_original_ids, + N_("Show original object ids of blobs/commits")), + OPT_END() }; @@@ -1080,7 -1163,8 +1163,8 @@@ } } - handle_tags_and_duplicates(); + handle_tags_and_duplicates(&extra_refs); + handle_tags_and_duplicates(&tag_refs); handle_deletes(); if (export_filename && lastimportid != last_idnum) diff --combined fast-import.c index 69886687ce,71b6cba00f..b7ba755c2b --- a/fast-import.c +++ b/fast-import.c @@@ -1,157 -1,3 +1,3 @@@ - /* - (See Documentation/git-fast-import.txt for maintained documentation.) - Format of STDIN stream: - - stream ::= cmd*; - - cmd ::= new_blob - | new_commit - | new_tag - | reset_branch - | checkpoint - | progress - ; - - new_blob ::= 'blob' lf - mark? - file_content; - file_content ::= data; - - new_commit ::= 'commit' sp ref_str lf - mark? - ('author' (sp name)? sp '<' email '>' sp when lf)? - 'committer' (sp name)? sp '<' email '>' sp when lf - commit_msg - ('from' sp commit-ish lf)? - ('merge' sp commit-ish lf)* - (file_change | ls)* - lf?; - commit_msg ::= data; - - ls ::= 'ls' sp '"' quoted(path) '"' lf; - - file_change ::= file_clr - | file_del - | file_rnm - | file_cpy - | file_obm - | file_inm; - file_clr ::= 'deleteall' lf; - file_del ::= 'D' sp path_str lf; - file_rnm ::= 'R' sp path_str sp path_str lf; - file_cpy ::= 'C' sp path_str sp path_str lf; - file_obm ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf; - file_inm ::= 'M' sp mode sp 'inline' sp path_str lf - data; - note_obm ::= 'N' sp (hexsha1 | idnum) sp commit-ish lf; - note_inm ::= 'N' sp 'inline' sp commit-ish lf - data; - - new_tag ::= 'tag' sp tag_str lf - 'from' sp commit-ish lf - ('tagger' (sp name)? sp '<' email '>' sp when lf)? - tag_msg; - tag_msg ::= data; - - reset_branch ::= 'reset' sp ref_str lf - ('from' sp commit-ish lf)? - lf?; - - checkpoint ::= 'checkpoint' lf - lf?; - - progress ::= 'progress' sp not_lf* lf - lf?; - - # note: the first idnum in a stream should be 1 and subsequent - # idnums should not have gaps between values as this will cause - # the stream parser to reserve space for the gapped values. An - # idnum can be updated in the future to a new object by issuing - # a new mark directive with the old idnum. - # - mark ::= 'mark' sp idnum lf; - data ::= (delimited_data | exact_data) - lf?; - - # note: delim may be any string but must not contain lf. - # data_line may contain any data but must not be exactly - # delim. - delimited_data ::= 'data' sp '<<' delim lf - (data_line lf)* - delim lf; - - # note: declen indicates the length of binary_data in bytes. - # declen does not include the lf preceding the binary data. - # - exact_data ::= 'data' sp declen lf - binary_data; - - # note: quoted strings are C-style quoting supporting \c for - # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn - # is the signed byte value in octal. Note that the only - # characters which must actually be escaped to protect the - # stream formatting is: \, " and LF. Otherwise these values - # are UTF8. - # - commit-ish ::= (ref_str | hexsha1 | sha1exp_str | idnum); - ref_str ::= ref; - sha1exp_str ::= sha1exp; - tag_str ::= tag; - path_str ::= path | '"' quoted(path) '"' ; - mode ::= '100644' | '644' - | '100755' | '755' - | '120000' - ; - - declen ::= # unsigned 32 bit value, ascii base10 notation; - bigint ::= # unsigned integer value, ascii base10 notation; - binary_data ::= # file content, not interpreted; - - when ::= raw_when | rfc2822_when; - raw_when ::= ts sp tz; - rfc2822_when ::= # Valid RFC 2822 date and time; - - sp ::= # ASCII space character; - lf ::= # ASCII newline (LF) character; - - # note: a colon (':') must precede the numerical value assigned to - # an idnum. This is to distinguish it from a ref or tag name as - # GIT does not permit ':' in ref or tag strings. - # - idnum ::= ':' bigint; - path ::= # GIT style file path, e.g. "a/b/c"; - ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT"; - tag ::= # GIT tag name, e.g. "FIREFOX_1_5"; - sha1exp ::= # Any valid GIT SHA1 expression; - hexsha1 ::= # SHA1 in hexadecimal format; - - # note: name and email are UTF8 strings, however name must not - # contain '<' or lf and email must not contain any of the - # following: '<', '>', lf. - # - name ::= # valid GIT author/committer name; - email ::= # valid GIT author/committer email; - ts ::= # time since the epoch in seconds, ascii base10 notation; - tz ::= # GIT style timezone; - - # note: comments, get-mark, ls-tree, and cat-blob requests may - # appear anywhere in the input, except within a data command. Any - # form of the data command always escapes the related input from - # comment processing. - # - # In case it is not clear, the '#' that starts the comment - # must be the first character on that line (an lf - # preceded it). - # - - get_mark ::= 'get-mark' sp idnum lf; - cat_blob ::= 'cat-blob' sp (hexsha1 | idnum) lf; - ls_tree ::= 'ls' sp (hexsha1 | idnum) sp path_str lf; - - comment ::= '#' not_lf* lf; - not_lf ::= # Any byte that is not ASCII newline (LF); - */ - #include "builtin.h" #include "cache.h" #include "repository.h" @@@ -1968,6 -1814,13 +1814,13 @@@ static void parse_mark(void next_mark = 0; } + static void parse_original_identifier(void) + { + const char *v; + if (skip_prefix(command_buf.buf, "original-oid ", &v)) + read_next_command(); + } + static int parse_data(struct strbuf *sb, uintmax_t limit, uintmax_t *len_res) { const char *data; @@@ -2110,6 -1963,7 +1963,7 @@@ static void parse_new_blob(void { read_next_command(); parse_mark(); + parse_original_identifier(); parse_and_store_blob(&last_blob, NULL, next_mark); } @@@ -2733,6 -2587,7 +2587,7 @@@ static void parse_new_commit(const cha read_next_command(); parse_mark(); + parse_original_identifier(); if (skip_prefix(command_buf.buf, "author ", &v)) { author = parse_ident(v); read_next_command(); @@@ -2865,6 -2720,9 +2720,9 @@@ static void parse_new_tag(const char *a die("Invalid ref name or SHA1 expression: %s", from); read_next_command(); + /* original-oid ... */ + parse_original_identifier(); + /* tagger ... */ if (skip_prefix(command_buf.buf, "tagger ", &v)) { tagger = parse_ident(v); @@@ -2955,8 -2813,8 +2813,8 @@@ static void cat_blob(struct object_entr die("Object %s is a %s but a blob was expected.", oid_to_hex(oid), type_name(type)); strbuf_reset(&line); - strbuf_addf(&line, "%s %s %lu\n", oid_to_hex(oid), - type_name(type), size); + strbuf_addf(&line, "%s %s %"PRIuMAX"\n", oid_to_hex(oid), + type_name(type), (uintmax_t)size); cat_blob_write(line.buf, line.len); strbuf_release(&line); cat_blob_write(buf, size);