Merge branch 'ab/fsck-skiplist'
authorJunio C Hamano <gitster@pobox.com>
Wed, 10 Oct 2018 03:37:16 +0000 (12:37 +0900)
committerJunio C Hamano <gitster@pobox.com>
Wed, 10 Oct 2018 03:37:16 +0000 (12:37 +0900)
Update fsck.skipList implementation and documentation.

* ab/fsck-skiplist:
fsck: support comments & empty lines in skipList
fsck: use oidset instead of oid_array for skipList
fsck: use strbuf_getline() to read skiplist file
fsck: add a performance test for skipList
fsck: add a performance test
fsck: document that skipList input must be unabbreviated
fsck: document and test commented & empty line skipList input
fsck: document and test sorted skipList input
fsck tests: add a test for no skipList input
fsck tests: setup of bogus commit object

Documentation/config.txt
fsck.c
fsck.h
t/perf/p1450-fsck.sh [new file with mode: 0755]
t/perf/p1451-fsck-skip-list.sh [new file with mode: 0755]
t/t5504-fetch-receive-strict.sh
index fa52b19dffcc515b76dc988088dd4cc2bd7c94f8..154683321343687f0fa769b193f2812ca0e30025 100644 (file)
@@ -1567,12 +1567,16 @@ doing the same for `receive.fsck.<msg-id>` and `fetch.fsck.<msg-id>`
 will only cause git to warn.
 
 fsck.skipList::
-       The path to a sorted list of object names (i.e. one SHA-1 per
+       The path to a list of object names (i.e. one unabbreviated SHA-1 per
        line) that are known to be broken in a non-fatal way and should
-       be ignored. This feature is useful when an established project
-       should be accepted despite early commits containing errors that
-       can be safely ignored such as invalid committer email addresses.
-       Note: corrupt objects cannot be skipped with this setting.
+       be ignored. On versions of Git 2.20 and later comments ('#'), empty
+       lines, and any leading and trailing whitespace is ignored. Everything
+       but a SHA-1 per line will error out on older versions.
++
+This feature is useful when an established project should be accepted
+despite early commits containing errors that can be safely ignored
+such as invalid committer email addresses.  Note: corrupt objects
+cannot be skipped with this setting.
 +
 Like `fsck.<msg-id>` this variable has corresponding
 `receive.fsck.skipList` and `fetch.fsck.skipList` variants.
@@ -1582,6 +1586,15 @@ Unlike variables like `color.ui` and `core.editor` the
 fall back on the `fsck.skipList` configuration if they aren't set. To
 uniformly configure the same fsck settings in different circumstances
 all three of them they must all set to the same values.
++
+Older versions of Git (before 2.20) documented that the object names
+list should be sorted. This was never a requirement, the object names
+could appear in any order, but when reading the list we tracked whether
+the list was sorted for the purposes of an internal binary search
+implementation, which could save itself some work with an already sorted
+list. Unless you had a humongous list there was no reason to go out of
+your way to pre-sort the list. After Git version 2.20 a hash implementation
+is used instead, so there's now no reason to pre-sort the list.
 
 gc.aggressiveDepth::
        The depth parameter used in the delta compression
diff --git a/fsck.c b/fsck.c
index 0f56977d6aa6499c06e31eddb8ed4afe484fcdc4..38624d251126ed5a75cd6b8e8b7adfddbb29b283 100644 (file)
--- a/fsck.c
+++ b/fsck.c
@@ -10,7 +10,6 @@
 #include "fsck.h"
 #include "refs.h"
 #include "utf8.h"
-#include "sha1-array.h"
 #include "decorate.h"
 #include "oidset.h"
 #include "packfile.h"
@@ -184,40 +183,37 @@ static int fsck_msg_type(enum fsck_msg_id msg_id,
 
 static void init_skiplist(struct fsck_options *options, const char *path)
 {
-       static struct oid_array skiplist = OID_ARRAY_INIT;
-       int sorted, fd;
-       char buffer[GIT_MAX_HEXSZ + 1];
+       FILE *fp;
+       struct strbuf sb = STRBUF_INIT;
        struct object_id oid;
 
-       if (options->skiplist)
-               sorted = options->skiplist->sorted;
-       else {
-               sorted = 1;
-               options->skiplist = &skiplist;
-       }
-
-       fd = open(path, O_RDONLY);
-       if (fd < 0)
+       fp = fopen(path, "r");
+       if (!fp)
                die("Could not open skip list: %s", path);
-       for (;;) {
+       while (!strbuf_getline(&sb, fp)) {
                const char *p;
-               int result = read_in_full(fd, buffer, sizeof(buffer));
-               if (result < 0)
-                       die_errno("Could not read '%s'", path);
-               if (!result)
-                       break;
-               if (parse_oid_hex(buffer, &oid, &p) || *p != '\n')
-                       die("Invalid SHA-1: %s", buffer);
-               oid_array_append(&skiplist, &oid);
-               if (sorted && skiplist.nr > 1 &&
-                               oidcmp(&skiplist.oid[skiplist.nr - 2],
-                                      &oid) > 0)
-                       sorted = 0;
-       }
-       close(fd);
+               const char *hash;
 
-       if (sorted)
-               skiplist.sorted = 1;
+               /*
+                * Allow trailing comments, leading whitespace
+                * (including before commits), and empty or whitespace
+                * only lines.
+                */
+               hash = strchr(sb.buf, '#');
+               if (hash)
+                       strbuf_setlen(&sb, hash - sb.buf);
+               strbuf_trim(&sb);
+               if (!sb.len)
+                       continue;
+
+               if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
+                       die("Invalid SHA-1: %s", sb.buf);
+               oidset_insert(&options->skiplist, &oid);
+       }
+       if (ferror(fp))
+               die_errno("Could not read '%s'", path);
+       fclose(fp);
+       strbuf_release(&sb);
 }
 
 static int parse_msg_type(const char *str)
@@ -322,9 +318,7 @@ static void append_msg_id(struct strbuf *sb, const char *msg_id)
 
 static int object_on_skiplist(struct fsck_options *opts, struct object *obj)
 {
-       if (opts && opts->skiplist && obj)
-               return oid_array_lookup(opts->skiplist, &obj->oid) >= 0;
-       return 0;
+       return opts && obj && oidset_contains(&opts->skiplist, &obj->oid);
 }
 
 __attribute__((format (printf, 4, 5)))
diff --git a/fsck.h b/fsck.h
index 0c7e8c9428bbc808abf27ff5dfe77d8d7309b470..b95595ae5fee6c065c4f54ad66ddc0edb8824d37 100644 (file)
--- a/fsck.h
+++ b/fsck.h
@@ -1,6 +1,8 @@
 #ifndef GIT_FSCK_H
 #define GIT_FSCK_H
 
+#include "oidset.h"
+
 #define FSCK_ERROR 1
 #define FSCK_WARN 2
 #define FSCK_IGNORE 3
@@ -35,12 +37,12 @@ struct fsck_options {
        fsck_error error_func;
        unsigned strict:1;
        int *msg_type;
-       struct oid_array *skiplist;
+       struct oidset skiplist;
        struct decoration *object_names;
 };
 
-#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL }
-#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL }
+#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT }
+#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL, OIDSET_INIT }
 
 /* descend in all linked child objects
  * the return value is:
diff --git a/t/perf/p1450-fsck.sh b/t/perf/p1450-fsck.sh
new file mode 100755 (executable)
index 0000000..ae1b841
--- /dev/null
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+test_description='Test fsck performance'
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+
+test_perf 'fsck' '
+       git fsck
+'
+
+test_done
diff --git a/t/perf/p1451-fsck-skip-list.sh b/t/perf/p1451-fsck-skip-list.sh
new file mode 100755 (executable)
index 0000000..c2b97d2
--- /dev/null
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+test_description='Test fsck skipList performance'
+
+. ./perf-lib.sh
+
+test_perf_fresh_repo
+
+n=1000000
+
+test_expect_success "setup $n bad commits" '
+       for i in $(test_seq 1 $n)
+       do
+               echo "commit refs/heads/master" &&
+               echo "committer C <c@example.com> 1234567890 +0000" &&
+               echo "data <<EOF" &&
+               echo "$i.Q." &&
+               echo "EOF"
+       done | q_to_nul | git fast-import
+'
+
+skip=0
+while test $skip -le $n
+do
+       test_expect_success "create skipList for $skip bad commits" '
+               git log --format=%H --max-count=$skip |
+               sort >skiplist
+       '
+
+       test_perf "fsck with $skip skipped bad commits" '
+               git -c fsck.skipList=skiplist fsck
+       '
+
+       case $skip in
+       0) skip=1 ;;
+       *) skip=${skip}0 ;;
+       esac
+done
+
+test_done
index 62f35698912d0655e44d53d50c7197b8200d2710..7bc706873c5b2341f6a0922cf2e4f79d34eee97c 100755 (executable)
@@ -133,6 +133,34 @@ committer Bugs Bunny <bugs@bun.ni> 1234567890 +0000
 This commit object intentionally broken
 EOF
 
+test_expect_success 'setup bogus commit' '
+       commit="$(git hash-object -t commit -w --stdin <bogus-commit)"
+'
+
+test_expect_success 'fsck with no skipList input' '
+       test_must_fail git fsck 2>err &&
+       test_i18ngrep "missingEmail" err
+'
+
+test_expect_success 'setup sorted and unsorted skipLists' '
+       cat >SKIP.unsorted <<-EOF &&
+       0000000000000000000000000000000000000004
+       0000000000000000000000000000000000000002
+       $commit
+       0000000000000000000000000000000000000001
+       0000000000000000000000000000000000000003
+       EOF
+       sort SKIP.unsorted >SKIP.sorted
+'
+
+test_expect_success 'fsck with sorted skipList' '
+       git -c fsck.skipList=SKIP.sorted fsck
+'
+
+test_expect_success 'fsck with unsorted skipList' '
+       git -c fsck.skipList=SKIP.unsorted fsck
+'
+
 test_expect_success 'fsck with invalid or bogus skipList input' '
        git -c fsck.skipList=/dev/null -c fsck.missingEmail=ignore fsck &&
        test_must_fail git -c fsck.skipList=does-not-exist -c fsck.missingEmail=ignore fsck 2>err &&
@@ -141,8 +169,47 @@ test_expect_success 'fsck with invalid or bogus skipList input' '
        test_i18ngrep "Invalid SHA-1: \[core\]" err
 '
 
+test_expect_success 'fsck with other accepted skipList input (comments & empty lines)' '
+       cat >SKIP.with-comment <<-EOF &&
+       # Some bad commit
+       0000000000000000000000000000000000000001
+       EOF
+       test_must_fail git -c fsck.skipList=SKIP.with-comment fsck 2>err-with-comment &&
+       test_i18ngrep "missingEmail" err-with-comment &&
+       cat >SKIP.with-empty-line <<-EOF &&
+       0000000000000000000000000000000000000001
+
+       0000000000000000000000000000000000000002
+       EOF
+       test_must_fail git -c fsck.skipList=SKIP.with-empty-line fsck 2>err-with-empty-line &&
+       test_i18ngrep "missingEmail" err-with-empty-line
+'
+
+test_expect_success 'fsck no garbage output from comments & empty lines errors' '
+       test_line_count = 1 err-with-comment &&
+       test_line_count = 1 err-with-empty-line
+'
+
+test_expect_success 'fsck with invalid abbreviated skipList input' '
+       echo $commit | test_copy_bytes 20 >SKIP.abbreviated &&
+       test_must_fail git -c fsck.skipList=SKIP.abbreviated fsck 2>err-abbreviated &&
+       test_i18ngrep "^fatal: Invalid SHA-1: " err-abbreviated
+'
+
+test_expect_success 'fsck with exhaustive accepted skipList input (various types of comments etc.)' '
+       >SKIP.exhaustive &&
+       echo "# A commented line" >>SKIP.exhaustive &&
+       echo "" >>SKIP.exhaustive &&
+       echo " " >>SKIP.exhaustive &&
+       echo " # Comment after whitespace" >>SKIP.exhaustive &&
+       echo "$commit # Our bad commit (with leading whitespace and trailing comment)" >>SKIP.exhaustive &&
+       echo "# Some bad commit (leading whitespace)" >>SKIP.exhaustive &&
+       echo "  0000000000000000000000000000000000000001" >>SKIP.exhaustive &&
+       git -c fsck.skipList=SKIP.exhaustive fsck 2>err &&
+       test_must_be_empty err
+'
+
 test_expect_success 'push with receive.fsck.skipList' '
-       commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
        git push . $commit:refs/heads/bogus &&
        rm -rf dst &&
        git init dst &&
@@ -169,7 +236,6 @@ test_expect_success 'push with receive.fsck.skipList' '
 '
 
 test_expect_success 'fetch with fetch.fsck.skipList' '
-       commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
        refspec=refs/heads/bogus:refs/heads/bogus &&
        git push . $commit:refs/heads/bogus &&
        rm -rf dst &&
@@ -204,7 +270,6 @@ test_expect_success 'fsck.<unknownmsg-id> dies' '
 '
 
 test_expect_success 'push with receive.fsck.missingEmail=warn' '
-       commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
        git push . $commit:refs/heads/bogus &&
        rm -rf dst &&
        git init dst &&
@@ -232,7 +297,6 @@ test_expect_success 'push with receive.fsck.missingEmail=warn' '
 '
 
 test_expect_success 'fetch with fetch.fsck.missingEmail=warn' '
-       commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
        refspec=refs/heads/bogus:refs/heads/bogus &&
        git push . $commit:refs/heads/bogus &&
        rm -rf dst &&