p0006-read-tree-checkout: perf test to time read-tree
authorJeff Hostetler <jeffhost@microsoft.com>
Wed, 19 Apr 2017 17:06:15 +0000 (17:06 +0000)
committerJunio C Hamano <gitster@pobox.com>
Thu, 20 Apr 2017 03:33:01 +0000 (20:33 -0700)
Created t/perf/repos/many-files.sh to generate large, but
artificial repositories.

Created t/perf/inflate-repo.sh to alter an EXISTING repo
to have a set of large commits. This can be used to create
a branch with 1M+ files in repositories like git.git or
linux.git, but with more realistic content. It does this
by making multiple copies of the entire worktree in a series
of sub-directories.

The branch name and ballast structure created by both scripts
match, so either script can be used to generate very large
test repositories for the following perf test.

Created t/perf/p0006-read-tree-checkout.sh to measure
performance on various read-tree, checkout, and update-index
operations. This test can run using either normal repos or
ones from the above scripts.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
t/perf/p0006-read-tree-checkout.sh [new file with mode: 0755]
t/perf/repos/.gitignore [new file with mode: 0644]
t/perf/repos/inflate-repo.sh [new file with mode: 0755]
t/perf/repos/many-files.sh [new file with mode: 0755]
diff --git a/t/perf/p0006-read-tree-checkout.sh b/t/perf/p0006-read-tree-checkout.sh
new file mode 100755 (executable)
index 0000000..78cc23f
--- /dev/null
@@ -0,0 +1,67 @@
+#!/bin/sh
+#
+# This test measures the performance of various read-tree
+# and checkout operations.  It is primarily interested in
+# the algorithmic costs of index operations and recursive
+# tree traversal -- and NOT disk I/O on thousands of files.
+
+test_description="Tests performance of read-tree"
+
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+# If the test repo was generated by ./repos/many-files.sh
+# then we know something about the data shape and branches,
+# so we can isolate testing to the ballast-related commits
+# and setup sparse-checkout so we don't have to populate
+# the ballast files and directories.
+#
+# Otherwise, we make some general assumptions about the
+# repo and consider the entire history of the current
+# branch to be the ballast.
+
+test_expect_success "setup repo" '
+       if git rev-parse --verify refs/heads/p0006-ballast^{commit}
+       then
+               echo Assuming synthetic repo from many-files.sh
+               git branch br_base            master
+               git branch br_ballast         p0006-ballast^
+               git branch br_ballast_alias   p0006-ballast^
+               git branch br_ballast_plus_1  p0006-ballast
+               git config --local core.sparsecheckout 1
+               cat >.git/info/sparse-checkout <<-EOF
+               /*
+               !ballast/*
+               EOF
+       else
+               echo Assuming non-synthetic repo...
+               git branch br_base            $(git rev-list HEAD | tail -n 1)
+               git branch br_ballast         HEAD^ || error "no ancestor commit from current head"
+               git branch br_ballast_alias   HEAD^
+               git branch br_ballast_plus_1  HEAD
+       fi &&
+       git checkout -q br_ballast &&
+       nr_files=$(git ls-files | wc -l)
+'
+
+test_perf "read-tree br_base br_ballast ($nr_files)" '
+       git read-tree -m br_base br_ballast -n
+'
+
+test_perf "switch between br_base br_ballast ($nr_files)" '
+       git checkout -q br_base &&
+       git checkout -q br_ballast
+'
+
+test_perf "switch between br_ballast br_ballast_plus_1 ($nr_files)" '
+       git checkout -q br_ballast_plus_1 &&
+       git checkout -q br_ballast
+'
+
+test_perf "switch between aliases ($nr_files)" '
+       git checkout -q br_ballast_alias &&
+       git checkout -q br_ballast
+'
+
+test_done
diff --git a/t/perf/repos/.gitignore b/t/perf/repos/.gitignore
new file mode 100644 (file)
index 0000000..72e3dc3
--- /dev/null
@@ -0,0 +1 @@
+gen-*/
diff --git a/t/perf/repos/inflate-repo.sh b/t/perf/repos/inflate-repo.sh
new file mode 100755 (executable)
index 0000000..fcfc992
--- /dev/null
@@ -0,0 +1,85 @@
+#!/bin/sh
+# Inflate the size of an EXISTING repo.
+#
+# This script should be run inside the worktree of a TEST repo.
+# It will use the contents of the current HEAD to generate a
+# commit containing copies of the current worktree such that the
+# total size of the commit has at least <target_size> files.
+#
+# Usage: [-t target_size] [-b branch_name]
+
+set -e
+
+target_size=10000
+branch_name=p0006-ballast
+ballast=ballast
+
+while test "$#" -ne 0
+do
+    case "$1" in
+       -b)
+           shift;
+           test "$#" -ne 0 || { echo 'error: -b requires an argument' >&2; exit 1; }
+           branch_name=$1;
+           shift ;;
+       -t)
+           shift;
+           test "$#" -ne 0 || { echo 'error: -t requires an argument' >&2; exit 1; }
+           target_size=$1;
+           shift ;;
+       *)
+           echo "error: unknown option '$1'" >&2; exit 1 ;;
+    esac
+done
+
+git ls-tree -r HEAD >GEN_src_list
+nr_src_files=$(cat GEN_src_list | wc -l)
+
+src_branch=$(git symbolic-ref --short HEAD)
+
+echo "Branch $src_branch initially has $nr_src_files files."
+
+if test $target_size -le $nr_src_files
+then
+    echo "Repository already exceeds target size $target_size."
+    rm GEN_src_list
+    exit 1
+fi
+
+# Create well-known branch and add 1 file change to start
+# if off before the ballast.
+git checkout -b $branch_name HEAD
+echo "$target_size" > inflate-repo.params
+git add inflate-repo.params
+git commit -q -m params
+
+# Create ballast for in our branch.
+copy=1
+nr_files=$nr_src_files
+while test $nr_files -lt $target_size
+do
+    sed -e "s| |       $ballast/$copy/|" <GEN_src_list |
+       git update-index --index-info
+
+    nr_files=$(expr $nr_files + $nr_src_files)
+    copy=$(expr $copy + 1)
+done
+rm GEN_src_list
+git commit -q -m "ballast"
+
+# Modify 1 file and commit.
+echo "$target_size" >> inflate-repo.params
+git add inflate-repo.params
+git commit -q -m "ballast plus 1"
+
+nr_files=$(git ls-files | wc -l)
+
+# Checkout master to put repo in canonical state (because
+# the perf test may need to clone and enable sparse-checkout
+# before attempting to checkout a commit with the ballast
+# (because it may contain 100K directories and 1M files)).
+git checkout $src_branch
+
+echo "Repository inflated. Branch $branch_name has $nr_files files."
+
+exit 0
diff --git a/t/perf/repos/many-files.sh b/t/perf/repos/many-files.sh
new file mode 100755 (executable)
index 0000000..28720e4
--- /dev/null
@@ -0,0 +1,110 @@
+#!/bin/sh
+# Generate test data repository using the given parameters.
+# When omitted, we create "gen-many-files-d-w-f.git".
+#
+# Usage: [-r repo] [-d depth] [-w width] [-f files]
+#
+# -r repo: path to the new repo to be generated
+# -d depth: the depth of sub-directories
+# -w width: the number of sub-directories at each level
+# -f files: the number of files created in each directory
+#
+# Note that all files will have the same SHA-1 and each
+# directory at a level will have the same SHA-1, so we
+# will potentially have a large index, but not a large
+# ODB.
+#
+# Ballast will be created under "ballast/".
+
+EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+
+set -e
+
+# (5, 10, 9) will create 999,999 ballast files.
+# (4, 10, 9) will create  99,999 ballast files.
+depth=5
+width=10
+files=9
+
+while test "$#" -ne 0
+do
+    case "$1" in
+       -r)
+           shift;
+           test "$#" -ne 0 || { echo 'error: -r requires an argument' >&2; exit 1; }
+           repo=$1;
+           shift ;;
+       -d)
+           shift;
+           test "$#" -ne 0 || { echo 'error: -d requires an argument' >&2; exit 1; }
+           depth=$1;
+           shift ;;
+       -w)
+           shift;
+           test "$#" -ne 0 || { echo 'error: -w requires an argument' >&2; exit 1; }
+           width=$1;
+           shift ;;
+       -f)
+           shift;
+           test "$#" -ne 0 || { echo 'error: -f requires an argument' >&2; exit 1; }
+           files=$1;
+           shift ;;
+       *)
+           echo "error: unknown option '$1'" >&2; exit 1 ;;
+       esac
+done
+
+# Inflate the index with thousands of empty files.
+# usage: dir depth width files
+fill_index() {
+       awk -v arg_dir=$1 -v arg_depth=$2 -v arg_width=$3 -v arg_files=$4 '
+               function make_paths(dir, depth, width, files, f, w) {
+                       for (f = 1; f <= files; f++) {
+                               print dir "/file" f
+                       }
+                       if (depth > 0) {
+                               for (w = 1; w <= width; w++) {
+                                       make_paths(dir "/dir" w, depth - 1, width, files)
+                               }
+                       }
+               }
+               END { make_paths(arg_dir, arg_depth, arg_width, arg_files) }
+               ' </dev/null |
+       sed "s/^/100644 $EMPTY_BLOB     /" |
+       git update-index --index-info
+       return 0
+}
+
+[ -z "$repo" ] && repo=gen-many-files-$depth.$width.$files.git
+
+mkdir $repo
+cd $repo
+git init .
+
+# Create an initial commit just to define master.
+touch many-files.empty
+echo "$depth $width $files" >many-files.params
+git add many-files.*
+git commit -q -m params
+
+# Create ballast for p0006 based upon the given params and
+# inflate the index with thousands of empty files and commit.
+git checkout -b p0006-ballast
+fill_index "ballast" $depth $width $files
+git commit -q -m "ballast"
+
+nr_files=$(git ls-files | wc -l)
+
+# Modify 1 file and commit.
+echo "$depth $width $files" >>many-files.params
+git add many-files.params
+git commit -q -m "ballast plus 1"
+
+# Checkout master to put repo in canonical state (because
+# the perf test may need to clone and enable sparse-checkout
+# before attempting to checkout a commit with the ballast
+# (because it may contain 100K directories and 1M files)).
+git checkout master
+
+echo "Repository "$repo" ($depth, $width, $files) created.  Ballast $nr_files."
+exit 0