t / perf / repos / many-files.shon commit add: introduce "--renormalize" (9472935)
   1#!/bin/sh
   2# Generate test data repository using the given parameters.
   3# When omitted, we create "gen-many-files-d-w-f.git".
   4#
   5# Usage: [-r repo] [-d depth] [-w width] [-f files]
   6#
   7# -r repo: path to the new repo to be generated
   8# -d depth: the depth of sub-directories
   9# -w width: the number of sub-directories at each level
  10# -f files: the number of files created in each directory
  11#
  12# Note that all files will have the same SHA-1 and each
  13# directory at a level will have the same SHA-1, so we
  14# will potentially have a large index, but not a large
  15# ODB.
  16#
  17# Ballast will be created under "ballast/".
  18
  19EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
  20
  21set -e
  22
  23# (5, 10, 9) will create 999,999 ballast files.
  24# (4, 10, 9) will create  99,999 ballast files.
  25depth=5
  26width=10
  27files=9
  28
  29while test "$#" -ne 0
  30do
  31    case "$1" in
  32        -r)
  33            shift;
  34            test "$#" -ne 0 || { echo 'error: -r requires an argument' >&2; exit 1; }
  35            repo=$1;
  36            shift ;;
  37        -d)
  38            shift;
  39            test "$#" -ne 0 || { echo 'error: -d requires an argument' >&2; exit 1; }
  40            depth=$1;
  41            shift ;;
  42        -w)
  43            shift;
  44            test "$#" -ne 0 || { echo 'error: -w requires an argument' >&2; exit 1; }
  45            width=$1;
  46            shift ;;
  47        -f)
  48            shift;
  49            test "$#" -ne 0 || { echo 'error: -f requires an argument' >&2; exit 1; }
  50            files=$1;
  51            shift ;;
  52        *)
  53            echo "error: unknown option '$1'" >&2; exit 1 ;;
  54        esac
  55done
  56
  57# Inflate the index with thousands of empty files.
  58# usage: dir depth width files
  59fill_index() {
  60        awk -v arg_dir=$1 -v arg_depth=$2 -v arg_width=$3 -v arg_files=$4 '
  61                function make_paths(dir, depth, width, files, f, w) {
  62                        for (f = 1; f <= files; f++) {
  63                                print dir "/file" f
  64                        }
  65                        if (depth > 0) {
  66                                for (w = 1; w <= width; w++) {
  67                                        make_paths(dir "/dir" w, depth - 1, width, files)
  68                                }
  69                        }
  70                }
  71                END { make_paths(arg_dir, arg_depth, arg_width, arg_files) }
  72                ' </dev/null |
  73        sed "s/^/100644 $EMPTY_BLOB     /" |
  74        git update-index --index-info
  75        return 0
  76}
  77
  78[ -z "$repo" ] && repo=gen-many-files-$depth.$width.$files.git
  79
  80mkdir $repo
  81cd $repo
  82git init .
  83
  84# Create an initial commit just to define master.
  85touch many-files.empty
  86echo "$depth $width $files" >many-files.params
  87git add many-files.*
  88git commit -q -m params
  89
  90# Create ballast for p0006 based upon the given params and
  91# inflate the index with thousands of empty files and commit.
  92git checkout -b p0006-ballast
  93fill_index "ballast" $depth $width $files
  94git commit -q -m "ballast"
  95
  96nr_files=$(git ls-files | wc -l)
  97
  98# Modify 1 file and commit.
  99echo "$depth $width $files" >>many-files.params
 100git add many-files.params
 101git commit -q -m "ballast plus 1"
 102
 103# Checkout master to put repo in canonical state (because
 104# the perf test may need to clone and enable sparse-checkout
 105# before attempting to checkout a commit with the ballast
 106# (because it may contain 100K directories and 1M files)).
 107git checkout master
 108
 109echo "Repository "$repo" ($depth, $width, $files) created.  Ballast $nr_files."
 110exit 0