Update fsck.skipList implementation and documentation.
* ab/fsck-skiplist:
fsck: support comments & empty lines in skipList
fsck: use oidset instead of oid_array for skipList
fsck: use strbuf_getline() to read skiplist file
fsck: add a performance test for skipList
fsck: add a performance test
fsck: document that skipList input must be unabbreviated
fsck: document and test commented & empty line skipList input
fsck: document and test sorted skipList input
fsck tests: add a test for no skipList input
fsck tests: setup of bogus commit object
will only cause git to warn.
fsck.skipList::
- The path to a sorted list of object names (i.e. one SHA-1 per
+ The path to a list of object names (i.e. one unabbreviated SHA-1 per
line) that are known to be broken in a non-fatal way and should
- be ignored. This feature is useful when an established project
- should be accepted despite early commits containing errors that
- can be safely ignored such as invalid committer email addresses.
- Note: corrupt objects cannot be skipped with this setting.
+ be ignored. On versions of Git 2.20 and later comments ('#'), empty
+ lines, and any leading and trailing whitespace is ignored. Everything
+ but a SHA-1 per line will error out on older versions.
++
+This feature is useful when an established project should be accepted
+despite early commits containing errors that can be safely ignored
+such as invalid committer email addresses. Note: corrupt objects
+cannot be skipped with this setting.
+
Like `fsck.<msg-id>` this variable has corresponding
`receive.fsck.skipList` and `fetch.fsck.skipList` variants.
fall back on the `fsck.skipList` configuration if they aren't set. To
uniformly configure the same fsck settings in different circumstances
all three of them they must all set to the same values.
++
+Older versions of Git (before 2.20) documented that the object names
+list should be sorted. This was never a requirement, the object names
+could appear in any order, but when reading the list we tracked whether
+the list was sorted for the purposes of an internal binary search
+implementation, which could save itself some work with an already sorted
+list. Unless you had a humongous list there was no reason to go out of
+your way to pre-sort the list. After Git version 2.20 a hash implementation
+is used instead, so there's now no reason to pre-sort the list.
gc.aggressiveDepth::
The depth parameter used in the delta compression
#include "fsck.h"
#include "refs.h"
#include "utf8.h"
-#include "sha1-array.h"
#include "decorate.h"
#include "oidset.h"
#include "packfile.h"
static void init_skiplist(struct fsck_options *options, const char *path)
{
- static struct oid_array skiplist = OID_ARRAY_INIT;
- int sorted, fd;
- char buffer[GIT_MAX_HEXSZ + 1];
+ FILE *fp;
+ struct strbuf sb = STRBUF_INIT;
struct object_id oid;
- if (options->skiplist)
- sorted = options->skiplist->sorted;
- else {
- sorted = 1;
- options->skiplist = &skiplist;
- }
-
- fd = open(path, O_RDONLY);
- if (fd < 0)
+ fp = fopen(path, "r");
+ if (!fp)
die("Could not open skip list: %s", path);
- for (;;) {
+ while (!strbuf_getline(&sb, fp)) {
const char *p;
- int result = read_in_full(fd, buffer, sizeof(buffer));
- if (result < 0)
- die_errno("Could not read '%s'", path);
- if (!result)
- break;
- if (parse_oid_hex(buffer, &oid, &p) || *p != '\n')
- die("Invalid SHA-1: %s", buffer);
- oid_array_append(&skiplist, &oid);
- if (sorted && skiplist.nr > 1 &&
- oidcmp(&skiplist.oid[skiplist.nr - 2],
- &oid) > 0)
- sorted = 0;
- }
- close(fd);
+ const char *hash;
- if (sorted)
- skiplist.sorted = 1;
+ /*
+ * Allow trailing comments, leading whitespace
+ * (including before commits), and empty or whitespace
+ * only lines.
+ */
+ hash = strchr(sb.buf, '#');
+ if (hash)
+ strbuf_setlen(&sb, hash - sb.buf);
+ strbuf_trim(&sb);
+ if (!sb.len)
+ continue;
+
+ if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
+ die("Invalid SHA-1: %s", sb.buf);
+ oidset_insert(&options->skiplist, &oid);
+ }
+ if (ferror(fp))
+ die_errno("Could not read '%s'", path);
+ fclose(fp);
+ strbuf_release(&sb);
}
static int parse_msg_type(const char *str)
static int object_on_skiplist(struct fsck_options *opts, struct object *obj)
{
- if (opts && opts->skiplist && obj)
- return oid_array_lookup(opts->skiplist, &obj->oid) >= 0;
- return 0;
+ return opts && obj && oidset_contains(&opts->skiplist, &obj->oid);
}
__attribute__((format (printf, 4, 5)))
#ifndef GIT_FSCK_H
#define GIT_FSCK_H
+#include "oidset.h"
+
#define FSCK_ERROR 1
#define FSCK_WARN 2
#define FSCK_IGNORE 3
fsck_error error_func;
unsigned strict:1;
int *msg_type;
- struct oid_array *skiplist;
+ struct oidset skiplist;
struct decoration *object_names;
};
-#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL }
-#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL }
+#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT }
+#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL, OIDSET_INIT }
/* descend in all linked child objects
* the return value is:
--- /dev/null
+#!/bin/sh
+
+test_description='Test fsck performance'
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+
+test_perf 'fsck' '
+ git fsck
+'
+
+test_done
--- /dev/null
+#!/bin/sh
+
+test_description='Test fsck skipList performance'
+
+. ./perf-lib.sh
+
+test_perf_fresh_repo
+
+n=1000000
+
+test_expect_success "setup $n bad commits" '
+ for i in $(test_seq 1 $n)
+ do
+ echo "commit refs/heads/master" &&
+ echo "committer C <c@example.com> 1234567890 +0000" &&
+ echo "data <<EOF" &&
+ echo "$i.Q." &&
+ echo "EOF"
+ done | q_to_nul | git fast-import
+'
+
+skip=0
+while test $skip -le $n
+do
+ test_expect_success "create skipList for $skip bad commits" '
+ git log --format=%H --max-count=$skip |
+ sort >skiplist
+ '
+
+ test_perf "fsck with $skip skipped bad commits" '
+ git -c fsck.skipList=skiplist fsck
+ '
+
+ case $skip in
+ 0) skip=1 ;;
+ *) skip=${skip}0 ;;
+ esac
+done
+
+test_done
This commit object intentionally broken
EOF
+test_expect_success 'setup bogus commit' '
+ commit="$(git hash-object -t commit -w --stdin <bogus-commit)"
+'
+
+test_expect_success 'fsck with no skipList input' '
+ test_must_fail git fsck 2>err &&
+ test_i18ngrep "missingEmail" err
+'
+
+test_expect_success 'setup sorted and unsorted skipLists' '
+ cat >SKIP.unsorted <<-EOF &&
+ 0000000000000000000000000000000000000004
+ 0000000000000000000000000000000000000002
+ $commit
+ 0000000000000000000000000000000000000001
+ 0000000000000000000000000000000000000003
+ EOF
+ sort SKIP.unsorted >SKIP.sorted
+'
+
+test_expect_success 'fsck with sorted skipList' '
+ git -c fsck.skipList=SKIP.sorted fsck
+'
+
+test_expect_success 'fsck with unsorted skipList' '
+ git -c fsck.skipList=SKIP.unsorted fsck
+'
+
test_expect_success 'fsck with invalid or bogus skipList input' '
git -c fsck.skipList=/dev/null -c fsck.missingEmail=ignore fsck &&
test_must_fail git -c fsck.skipList=does-not-exist -c fsck.missingEmail=ignore fsck 2>err &&
test_i18ngrep "Invalid SHA-1: \[core\]" err
'
+test_expect_success 'fsck with other accepted skipList input (comments & empty lines)' '
+ cat >SKIP.with-comment <<-EOF &&
+ # Some bad commit
+ 0000000000000000000000000000000000000001
+ EOF
+ test_must_fail git -c fsck.skipList=SKIP.with-comment fsck 2>err-with-comment &&
+ test_i18ngrep "missingEmail" err-with-comment &&
+ cat >SKIP.with-empty-line <<-EOF &&
+ 0000000000000000000000000000000000000001
+
+ 0000000000000000000000000000000000000002
+ EOF
+ test_must_fail git -c fsck.skipList=SKIP.with-empty-line fsck 2>err-with-empty-line &&
+ test_i18ngrep "missingEmail" err-with-empty-line
+'
+
+test_expect_success 'fsck no garbage output from comments & empty lines errors' '
+ test_line_count = 1 err-with-comment &&
+ test_line_count = 1 err-with-empty-line
+'
+
+test_expect_success 'fsck with invalid abbreviated skipList input' '
+ echo $commit | test_copy_bytes 20 >SKIP.abbreviated &&
+ test_must_fail git -c fsck.skipList=SKIP.abbreviated fsck 2>err-abbreviated &&
+ test_i18ngrep "^fatal: Invalid SHA-1: " err-abbreviated
+'
+
+test_expect_success 'fsck with exhaustive accepted skipList input (various types of comments etc.)' '
+ >SKIP.exhaustive &&
+ echo "# A commented line" >>SKIP.exhaustive &&
+ echo "" >>SKIP.exhaustive &&
+ echo " " >>SKIP.exhaustive &&
+ echo " # Comment after whitespace" >>SKIP.exhaustive &&
+ echo "$commit # Our bad commit (with leading whitespace and trailing comment)" >>SKIP.exhaustive &&
+ echo "# Some bad commit (leading whitespace)" >>SKIP.exhaustive &&
+ echo " 0000000000000000000000000000000000000001" >>SKIP.exhaustive &&
+ git -c fsck.skipList=SKIP.exhaustive fsck 2>err &&
+ test_must_be_empty err
+'
+
test_expect_success 'push with receive.fsck.skipList' '
- commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
git push . $commit:refs/heads/bogus &&
rm -rf dst &&
git init dst &&
'
test_expect_success 'fetch with fetch.fsck.skipList' '
- commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
refspec=refs/heads/bogus:refs/heads/bogus &&
git push . $commit:refs/heads/bogus &&
rm -rf dst &&
'
test_expect_success 'push with receive.fsck.missingEmail=warn' '
- commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
git push . $commit:refs/heads/bogus &&
rm -rf dst &&
git init dst &&
'
test_expect_success 'fetch with fetch.fsck.missingEmail=warn' '
- commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
refspec=refs/heads/bogus:refs/heads/bogus &&
git push . $commit:refs/heads/bogus &&
rm -rf dst &&