Merge branch 'jt/repack-promisor-packs'
authorJunio C Hamano <gitster@pobox.com>
Mon, 20 Aug 2018 18:33:55 +0000 (11:33 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 20 Aug 2018 19:40:31 +0000 (12:40 -0700)
After a partial clone, repeated fetches from promisor remote would
have accumulated many packfiles marked with .promisor bit without
getting them coalesced into fewer packfiles, hurting performance.
"git repack" now learned to repack them.

* jt/repack-promisor-packs:
repack: repack promisor objects if -a or -A is set
repack: refactor setup of pack-objects cmd

1  2 
builtin/repack.c
diff --combined builtin/repack.c
index 6c636e159eaf2d67d617c459aceddd7423e326ab,5c97dec3dbb7691436f19d0772313cadaf7c018d..d5886039cc6656609962fd522a27f61eda6cd0ec
@@@ -8,6 -8,7 +8,8 @@@
  #include "strbuf.h"
  #include "string-list.h"
  #include "argv-array.h"
+ #include "packfile.h"
++#include "object-store.h"
  
  static int delta_base_offset = 1;
  static int pack_kept_objects = -1;
@@@ -83,7 -84,7 +85,7 @@@ static void remove_pack_on_signal(int s
  
  /*
   * Adds all packs hex strings to the fname list, which do not
-  * have a corresponding .keep or .promisor file. These packs are not to
+  * have a corresponding .keep file. These packs are not to
   * be kept if we are going to pack everything into one file.
   */
  static void get_non_kept_pack_filenames(struct string_list *fname_list,
  
                fname = xmemdupz(e->d_name, len);
  
-               if (!file_exists(mkpath("%s/%s.keep", packdir, fname)) &&
-                   !file_exists(mkpath("%s/%s.promisor", packdir, fname)))
+               if (!file_exists(mkpath("%s/%s.keep", packdir, fname)))
                        string_list_append_nodup(fname_list, fname);
                else
                        free(fname);
  
  static void remove_redundant_pack(const char *dir_name, const char *base_name)
  {
-       const char *exts[] = {".pack", ".idx", ".keep", ".bitmap"};
+       const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"};
        int i;
        struct strbuf buf = STRBUF_INIT;
        size_t plen;
        strbuf_release(&buf);
  }
  
+ struct pack_objects_args {
+       const char *window;
+       const char *window_memory;
+       const char *depth;
+       const char *threads;
+       const char *max_pack_size;
+       int no_reuse_delta;
+       int no_reuse_object;
+       int quiet;
+       int local;
+ };
+ static void prepare_pack_objects(struct child_process *cmd,
+                                const struct pack_objects_args *args)
+ {
+       argv_array_push(&cmd->args, "pack-objects");
+       if (args->window)
+               argv_array_pushf(&cmd->args, "--window=%s", args->window);
+       if (args->window_memory)
+               argv_array_pushf(&cmd->args, "--window-memory=%s", args->window_memory);
+       if (args->depth)
+               argv_array_pushf(&cmd->args, "--depth=%s", args->depth);
+       if (args->threads)
+               argv_array_pushf(&cmd->args, "--threads=%s", args->threads);
+       if (args->max_pack_size)
+               argv_array_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
+       if (args->no_reuse_delta)
+               argv_array_pushf(&cmd->args, "--no-reuse-delta");
+       if (args->no_reuse_object)
+               argv_array_pushf(&cmd->args, "--no-reuse-object");
+       if (args->local)
+               argv_array_push(&cmd->args,  "--local");
+       if (args->quiet)
+               argv_array_push(&cmd->args,  "--quiet");
+       if (delta_base_offset)
+               argv_array_push(&cmd->args,  "--delta-base-offset");
+       argv_array_push(&cmd->args, packtmp);
+       cmd->git_cmd = 1;
+       cmd->out = -1;
+ }
+ /*
+  * Write oid to the given struct child_process's stdin, starting it first if
+  * necessary.
+  */
+ static int write_oid(const struct object_id *oid, struct packed_git *pack,
+                    uint32_t pos, void *data)
+ {
+       struct child_process *cmd = data;
+       if (cmd->in == -1) {
+               if (start_command(cmd))
+                       die("Could not start pack-objects to repack promisor objects");
+       }
+       xwrite(cmd->in, oid_to_hex(oid), GIT_SHA1_HEXSZ);
+       xwrite(cmd->in, "\n", 1);
+       return 0;
+ }
+ static void repack_promisor_objects(const struct pack_objects_args *args,
+                                   struct string_list *names)
+ {
+       struct child_process cmd = CHILD_PROCESS_INIT;
+       FILE *out;
+       struct strbuf line = STRBUF_INIT;
+       prepare_pack_objects(&cmd, args);
+       cmd.in = -1;
+       /*
+        * NEEDSWORK: Giving pack-objects only the OIDs without any ordering
+        * hints may result in suboptimal deltas in the resulting pack. See if
+        * the OIDs can be sent with fake paths such that pack-objects can use a
+        * {type -> existing pack order} ordering when computing deltas instead
+        * of a {type -> size} ordering, which may produce better deltas.
+        */
+       for_each_packed_object(write_oid, &cmd,
+                              FOR_EACH_OBJECT_PROMISOR_ONLY);
+       if (cmd.in == -1)
+               /* No packed objects; cmd was never started */
+               return;
+       close(cmd.in);
+       out = xfdopen(cmd.out, "r");
+       while (strbuf_getline_lf(&line, out) != EOF) {
+               char *promisor_name;
+               int fd;
+               if (line.len != 40)
+                       die("repack: Expecting 40 character sha1 lines only from pack-objects.");
+               string_list_append(names, line.buf);
+               /*
+                * pack-objects creates the .pack and .idx files, but not the
+                * .promisor file. Create the .promisor file, which is empty.
+                */
+               promisor_name = mkpathdup("%s-%s.promisor", packtmp,
+                                         line.buf);
+               fd = open(promisor_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
+               if (fd < 0)
+                       die_errno("unable to create '%s'", promisor_name);
+               close(fd);
+               free(promisor_name);
+       }
+       fclose(out);
+       if (finish_command(&cmd))
+               die("Could not finish pack-objects to repack promisor objects");
+ }
  #define ALL_INTO_ONE 1
  #define LOOSEN_UNREACHABLE 2
  
@@@ -150,6 -261,7 +262,7 @@@ int cmd_repack(int argc, const char **a
                {".pack"},
                {".idx"},
                {".bitmap", 1},
+               {".promisor", 1},
        };
        struct child_process cmd = CHILD_PROCESS_INIT;
        struct string_list_item *item;
        int delete_redundant = 0;
        const char *unpack_unreachable = NULL;
        int keep_unreachable = 0;
-       const char *window = NULL, *window_memory = NULL;
-       const char *depth = NULL;
-       const char *threads = NULL;
-       const char *max_pack_size = NULL;
        struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
-       int no_reuse_delta = 0, no_reuse_object = 0;
        int no_update_server_info = 0;
-       int quiet = 0;
-       int local = 0;
+       struct pack_objects_args po_args = {NULL};
  
        struct option builtin_repack_options[] = {
                OPT_BIT('a', NULL, &pack_everything,
                                   LOOSEN_UNREACHABLE | ALL_INTO_ONE),
                OPT_BOOL('d', NULL, &delete_redundant,
                                N_("remove redundant packs, and run git-prune-packed")),
-               OPT_BOOL('f', NULL, &no_reuse_delta,
+               OPT_BOOL('f', NULL, &po_args.no_reuse_delta,
                                N_("pass --no-reuse-delta to git-pack-objects")),
-               OPT_BOOL('F', NULL, &no_reuse_object,
+               OPT_BOOL('F', NULL, &po_args.no_reuse_object,
                                N_("pass --no-reuse-object to git-pack-objects")),
                OPT_BOOL('n', NULL, &no_update_server_info,
                                N_("do not run git-update-server-info")),
-               OPT__QUIET(&quiet, N_("be quiet")),
-               OPT_BOOL('l', "local", &local,
+               OPT__QUIET(&po_args.quiet, N_("be quiet")),
+               OPT_BOOL('l', "local", &po_args.local,
                                N_("pass --local to git-pack-objects")),
                OPT_BOOL('b', "write-bitmap-index", &write_bitmaps,
                                N_("write bitmap index")),
                                N_("with -A, do not loosen objects older than this")),
                OPT_BOOL('k', "keep-unreachable", &keep_unreachable,
                                N_("with -a, repack unreachable objects")),
-               OPT_STRING(0, "window", &window, N_("n"),
+               OPT_STRING(0, "window", &po_args.window, N_("n"),
                                N_("size of the window used for delta compression")),
-               OPT_STRING(0, "window-memory", &window_memory, N_("bytes"),
+               OPT_STRING(0, "window-memory", &po_args.window_memory, N_("bytes"),
                                N_("same as the above, but limit memory size instead of entries count")),
-               OPT_STRING(0, "depth", &depth, N_("n"),
+               OPT_STRING(0, "depth", &po_args.depth, N_("n"),
                                N_("limits the maximum delta depth")),
-               OPT_STRING(0, "threads", &threads, N_("n"),
+               OPT_STRING(0, "threads", &po_args.threads, N_("n"),
                                N_("limits the maximum number of threads")),
-               OPT_STRING(0, "max-pack-size", &max_pack_size, N_("bytes"),
+               OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
                                N_("maximum size of each packfile")),
                OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
                                N_("repack objects in packs marked with .keep")),
  
        sigchain_push_common(remove_pack_on_signal);
  
-       argv_array_push(&cmd.args, "pack-objects");
+       prepare_pack_objects(&cmd, &po_args);
        argv_array_push(&cmd.args, "--keep-true-parents");
        if (!pack_kept_objects)
                argv_array_push(&cmd.args, "--honor-pack-keep");
        argv_array_push(&cmd.args, "--indexed-objects");
        if (repository_format_partial_clone)
                argv_array_push(&cmd.args, "--exclude-promisor-objects");
-       if (window)
-               argv_array_pushf(&cmd.args, "--window=%s", window);
-       if (window_memory)
-               argv_array_pushf(&cmd.args, "--window-memory=%s", window_memory);
-       if (depth)
-               argv_array_pushf(&cmd.args, "--depth=%s", depth);
-       if (threads)
-               argv_array_pushf(&cmd.args, "--threads=%s", threads);
-       if (max_pack_size)
-               argv_array_pushf(&cmd.args, "--max-pack-size=%s", max_pack_size);
-       if (no_reuse_delta)
-               argv_array_pushf(&cmd.args, "--no-reuse-delta");
-       if (no_reuse_object)
-               argv_array_pushf(&cmd.args, "--no-reuse-object");
        if (write_bitmaps)
                argv_array_push(&cmd.args, "--write-bitmap-index");
  
        if (pack_everything & ALL_INTO_ONE) {
                get_non_kept_pack_filenames(&existing_packs, &keep_pack_list);
  
+               repack_promisor_objects(&po_args, &names);
                if (existing_packs.nr && delete_redundant) {
                        if (unpack_unreachable) {
                                argv_array_pushf(&cmd.args,
                argv_array_push(&cmd.args, "--incremental");
        }
  
-       if (local)
-               argv_array_push(&cmd.args,  "--local");
-       if (quiet)
-               argv_array_push(&cmd.args,  "--quiet");
-       if (delta_base_offset)
-               argv_array_push(&cmd.args,  "--delta-base-offset");
-       argv_array_push(&cmd.args, packtmp);
-       cmd.git_cmd = 1;
-       cmd.out = -1;
        cmd.no_stdin = 1;
  
        ret = start_command(&cmd);
        if (ret)
                return ret;
  
-       if (!names.nr && !quiet)
+       if (!names.nr && !po_args.quiet)
                printf("Nothing new to pack.\n");
  
        /*
  
        /* End of pack replacement. */
  
+       reprepare_packed_git(the_repository);
        if (delete_redundant) {
                int opts = 0;
                string_list_sort(&names);
                        if (!string_list_has_string(&names, sha1))
                                remove_redundant_pack(packdir, item->string);
                }
-               if (!quiet && isatty(2))
+               if (!po_args.quiet && isatty(2))
                        opts |= PRUNE_PACKED_VERBOSE;
                prune_packed_objects(opts);
        }