Merge branch 'ew/fast-import-unpack-limit'
authorJunio C Hamano <gitster@pobox.com>
Mon, 20 Jun 2016 18:01:00 +0000 (11:01 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 20 Jun 2016 18:01:00 +0000 (11:01 -0700)
"git fast-import" learned the same performance trick to avoid
creating too small a packfile as "git fetch" and "git push" have,
using *.unpackLimit configuration.

* ew/fast-import-unpack-limit:
fast-import: invalidate pack_id references after loosening
fast-import: implement unpack limit

Documentation/config.txt
Documentation/git-fast-import.txt
fast-import.c
t/t9300-fast-import.sh
t/t9302-fast-import-unpack-limit.sh [new file with mode: 0755]
index 2e1b2e486e615981c14f7e591f3f32aa06cdee7a..58673cf21e2a0c2314aea71f16b651e3f7409d56 100644 (file)
@@ -1189,6 +1189,15 @@ difftool.<tool>.cmd::
 difftool.prompt::
        Prompt before each invocation of the diff tool.
 
+fastimport.unpackLimit::
+       If the number of objects imported by linkgit:git-fast-import[1]
+       is below this limit, then the objects will be unpacked into
+       loose object files.  However if the number of imported objects
+       equals or exceeds this limit then the pack will be stored as a
+       pack.  Storing the pack from a fast-import can make the import
+       operation complete faster, especially on slow filesystems.  If
+       not set, the value of `transfer.unpackLimit` is used instead.
+
 fetch.recurseSubmodules::
        This option can be either set to a boolean value or to 'on-demand'.
        Setting it to a boolean changes the behavior of fetch and pull to
index 66910aa2faff1d78b71c5c552550a4f8e9d51832..644df993f924d3265db10af6e6c29e33daf95482 100644 (file)
@@ -136,6 +136,8 @@ Performance and Compression Tuning
        Maximum size of each output packfile.
        The default is unlimited.
 
+fastimport.unpackLimit::
+       See linkgit:git-config[1]
 
 Performance
 -----------
index c504ef752db124e21156be5b92360dbe432e568f..59630cee1488bda274bd4f4bd8bf2748d9ab081a 100644 (file)
@@ -166,6 +166,7 @@ Format of STDIN stream:
 #include "quote.h"
 #include "exec_cmd.h"
 #include "dir.h"
+#include "run-command.h"
 
 #define PACK_ID_BITS 16
 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
@@ -282,6 +283,7 @@ struct recent_command {
 /* Configured limits on output */
 static unsigned long max_depth = 10;
 static off_t max_packsize;
+static int unpack_limit = 100;
 static int force_update;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
 static int pack_compression_seen;
@@ -596,6 +598,33 @@ static struct object_entry *insert_object(unsigned char *sha1)
        return e;
 }
 
+static void invalidate_pack_id(unsigned int id)
+{
+       unsigned int h;
+       unsigned long lu;
+       struct tag *t;
+
+       for (h = 0; h < ARRAY_SIZE(object_table); h++) {
+               struct object_entry *e;
+
+               for (e = object_table[h]; e; e = e->next)
+                       if (e->pack_id == id)
+                               e->pack_id = MAX_PACK_ID;
+       }
+
+       for (lu = 0; lu < branch_table_sz; lu++) {
+               struct branch *b;
+
+               for (b = branch_table[lu]; b; b = b->table_next_branch)
+                       if (b->pack_id == id)
+                               b->pack_id = MAX_PACK_ID;
+       }
+
+       for (t = first_tag; t; t = t->next_tag)
+               if (t->pack_id == id)
+                       t->pack_id = MAX_PACK_ID;
+}
+
 static unsigned int hc_str(const char *s, size_t len)
 {
        unsigned int r = 0;
@@ -951,6 +980,23 @@ static void unkeep_all_packs(void)
        }
 }
 
+static int loosen_small_pack(const struct packed_git *p)
+{
+       struct child_process unpack = CHILD_PROCESS_INIT;
+
+       if (lseek(p->pack_fd, 0, SEEK_SET) < 0)
+               die_errno("Failed seeking to start of '%s'", p->pack_name);
+
+       unpack.in = p->pack_fd;
+       unpack.git_cmd = 1;
+       unpack.stdout_to_stderr = 1;
+       argv_array_push(&unpack.args, "unpack-objects");
+       if (!show_stats)
+               argv_array_push(&unpack.args, "-q");
+
+       return run_command(&unpack);
+}
+
 static void end_packfile(void)
 {
        static int running;
@@ -973,6 +1019,14 @@ static void end_packfile(void)
                fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
                                    pack_data->pack_name, object_count,
                                    cur_pack_sha1, pack_size);
+
+               if (object_count <= unpack_limit) {
+                       if (!loosen_small_pack(pack_data)) {
+                               invalidate_pack_id(pack_id);
+                               goto discard_pack;
+                       }
+               }
+
                close(pack_data->pack_fd);
                idx_name = keep_pack(create_index());
 
@@ -1003,6 +1057,7 @@ static void end_packfile(void)
                pack_id++;
        }
        else {
+discard_pack:
                close(pack_data->pack_fd);
                unlink_or_warn(pack_data->pack_name);
        }
@@ -3320,6 +3375,7 @@ static void parse_option(const char *option)
 static void git_pack_config(void)
 {
        int indexversion_value;
+       int limit;
        unsigned long packsizelimit_value;
 
        if (!git_config_get_ulong("pack.depth", &max_depth)) {
@@ -3344,6 +3400,11 @@ static void git_pack_config(void)
        if (!git_config_get_ulong("pack.packsizelimit", &packsizelimit_value))
                max_packsize = packsizelimit_value;
 
+       if (!git_config_get_int("fastimport.unpacklimit", &limit))
+               unpack_limit = limit;
+       else if (!git_config_get_int("transfer.unpacklimit", &limit))
+               unpack_limit = limit;
+
        git_config(git_default_config, NULL);
 }
 
index 4bca35c2594bbff4a659e1d6d1dcc5e746956a84..74d740de41bbd489dd0ce9fb811f9ea1c08b248c 100755 (executable)
@@ -52,6 +52,7 @@ echo "$@"'
 ###
 
 test_expect_success 'empty stream succeeds' '
+       git config fastimport.unpackLimit 0 &&
        git fast-import </dev/null
 '
 
@@ -2690,6 +2691,7 @@ test_expect_success 'R: blob bigger than threshold' '
        echo >>input &&
 
        test_create_repo R &&
+       git --git-dir=R/.git config fastimport.unpackLimit 0 &&
        git --git-dir=R/.git fast-import --big-file-threshold=1 <input
 '
 
diff --git a/t/t9302-fast-import-unpack-limit.sh b/t/t9302-fast-import-unpack-limit.sh
new file mode 100755 (executable)
index 0000000..a04de14
--- /dev/null
@@ -0,0 +1,105 @@
+#!/bin/sh
+test_description='test git fast-import unpack limit'
+. ./test-lib.sh
+
+test_expect_success 'create loose objects on import' '
+       test_tick &&
+       cat >input <<-INPUT_END &&
+       commit refs/heads/master
+       committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+       data <<COMMIT
+       initial
+       COMMIT
+
+       done
+       INPUT_END
+
+       git -c fastimport.unpackLimit=2 fast-import --done <input &&
+       git fsck --no-progress &&
+       test $(find .git/objects/?? -type f | wc -l) -eq 2 &&
+       test $(find .git/objects/pack -type f | wc -l) -eq 0
+'
+
+test_expect_success 'bigger packs are preserved' '
+       test_tick &&
+       cat >input <<-INPUT_END &&
+       commit refs/heads/master
+       committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+       data <<COMMIT
+       incremental should create a pack
+       COMMIT
+       from refs/heads/master^0
+
+       commit refs/heads/branch
+       committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+       data <<COMMIT
+       branch
+       COMMIT
+
+       done
+       INPUT_END
+
+       git -c fastimport.unpackLimit=2 fast-import --done <input &&
+       git fsck --no-progress &&
+       test $(find .git/objects/?? -type f | wc -l) -eq 2 &&
+       test $(find .git/objects/pack -type f | wc -l) -eq 2
+'
+
+test_expect_success 'lookups after checkpoint works' '
+       hello_id=$(echo hello | git hash-object --stdin -t blob) &&
+       id="$GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE" &&
+       before=$(git rev-parse refs/heads/master^0) &&
+       (
+               cat <<-INPUT_END &&
+               blob
+               mark :1
+               data 6
+               hello
+
+               commit refs/heads/master
+               mark :2
+               committer $id
+               data <<COMMIT
+               checkpoint after this
+               COMMIT
+               from refs/heads/master^0
+               M 100644 :1 hello
+
+               # pre-checkpoint
+               cat-blob :1
+               cat-blob $hello_id
+               checkpoint
+               # post-checkpoint
+               cat-blob :1
+               cat-blob $hello_id
+               INPUT_END
+
+               n=0 &&
+               from=$before &&
+               while test x"$from" = x"$before"
+               do
+                       if test $n -gt 30
+                       then
+                               echo >&2 "checkpoint did not update branch"
+                               exit 1
+                       else
+                               n=$(($n + 1))
+                       fi &&
+                       sleep 1 &&
+                       from=$(git rev-parse refs/heads/master^0)
+               done &&
+               cat <<-INPUT_END &&
+               commit refs/heads/master
+               committer $id
+               data <<COMMIT
+               make sure from "unpacked sha1 reference" works, too
+               COMMIT
+               from $from
+               INPUT_END
+               echo done
+       ) | git -c fastimport.unpackLimit=100 fast-import --done &&
+       test $(find .git/objects/?? -type f | wc -l) -eq 6 &&
+       test $(find .git/objects/pack -type f | wc -l) -eq 2
+'
+
+test_done