Merge branch 'jk/pack-objects-optim'
authorJunio C Hamano <gitster@pobox.com>
Mon, 8 Aug 2016 21:48:39 +0000 (14:48 -0700)
committerJunio C Hamano <gitster@pobox.com>
Mon, 8 Aug 2016 21:48:39 +0000 (14:48 -0700)
"git pack-objects" has a few options that tell it not to pack
objects found in certain packfiles, which require it to scan .idx
files of all available packs. The codepaths involved in these
operations have been optimized for a common case of not having any
non-local pack and/or any .kept pack.

* jk/pack-objects-optim:
pack-objects: compute local/ignore_pack_keep early
pack-objects: break out of want_object loop early
find_pack_entry: replace last_found_pack with MRU cache
add generic most-recently-used list
sha1_file: drop free_pack_by_name
t/perf: add tests for many-pack scenarios

1  2 
Makefile
builtin/pack-objects.c
cache.h
sha1_file.c
diff --combined Makefile
index 6a13386c27336b0f3054389674872a2c842f3290,3b82ce00700a5d5fbaf8ca27c0411f7431f3e9a9..ad3624d95ba337e595874b04bf247d8b3be1d183
+++ b/Makefile
@@@ -351,12 -351,9 +351,12 @@@ all:
  # Define GMTIME_UNRELIABLE_ERRORS if your gmtime() function does not
  # return NULL when it receives a bogus time_t.
  #
 -# Define HAVE_CLOCK_GETTIME if your platform has clock_gettime in librt.
 +# Define HAVE_CLOCK_GETTIME if your platform has clock_gettime.
  #
 -# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC in librt.
 +# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
 +#
 +# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
 +# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
  #
  # Define USE_PARENS_AROUND_GETTEXT_N to "yes" if your compiler happily
  # compiles the following initialization:
@@@ -721,7 -718,6 +721,7 @@@ LIB_OBJS += diff-lib.
  LIB_OBJS += diff-no-index.o
  LIB_OBJS += diff.o
  LIB_OBJS += dir.o
 +LIB_OBJS += dir-iterator.o
  LIB_OBJS += editor.o
  LIB_OBJS += entry.o
  LIB_OBJS += environment.o
@@@ -755,6 -751,7 +755,7 @@@ LIB_OBJS += merge.
  LIB_OBJS += merge-blobs.o
  LIB_OBJS += merge-recursive.o
  LIB_OBJS += mergesort.o
+ LIB_OBJS += mru.o
  LIB_OBJS += name-hash.o
  LIB_OBJS += notes.o
  LIB_OBJS += notes-cache.o
@@@ -786,7 -783,6 +787,7 @@@ LIB_OBJS += read-cache.
  LIB_OBJS += reflog-walk.o
  LIB_OBJS += refs.o
  LIB_OBJS += refs/files-backend.o
 +LIB_OBJS += refs/iterator.o
  LIB_OBJS += ref-filter.o
  LIB_OBJS += remote.o
  LIB_OBJS += replace_object.o
@@@ -1470,16 -1466,13 +1471,16 @@@ endi
  
  ifdef HAVE_CLOCK_GETTIME
        BASIC_CFLAGS += -DHAVE_CLOCK_GETTIME
 -      EXTLIBS += -lrt
  endif
  
  ifdef HAVE_CLOCK_MONOTONIC
        BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
  endif
  
 +ifdef NEEDS_LIBRT
 +      EXTLIBS += -lrt
 +endif
 +
  ifdef HAVE_BSD_SYSCTL
        BASIC_CFLAGS += -DHAVE_BSD_SYSCTL
  endif
@@@ -2245,9 -2238,17 +2246,9 @@@ perf: al
  
  .PHONY: test perf
  
 -t/helper/test-ctype$X: ctype.o
 -
 -t/helper/test-date$X: date.o ctype.o
 -
 -t/helper/test-delta$X: diff-delta.o patch-delta.o
 -
 -t/helper/test-line-buffer$X: vcs-svn/lib.a
 -
 -t/helper/test-parse-options$X: parse-options.o parse-options-cb.o
 +t/helper/test-line-buffer$X: $(VCSSVN_LIB)
  
 -t/helper/test-svn-fe$X: vcs-svn/lib.a
 +t/helper/test-svn-fe$X: $(VCSSVN_LIB)
  
  .PRECIOUS: $(TEST_OBJS)
  
diff --combined builtin/pack-objects.c
index 92e2e5f7a8190c546f2c0cc9bc9724a04d330052,c4c2a3c79d31099ef411cf155e9744948df60c7c..4a63398960c42d91821c4278a4cfbd11c1f1a7a4
@@@ -46,6 -46,7 +46,7 @@@ static int keep_unreachable, unpack_unr
  static unsigned long unpack_unreachable_expiration;
  static int pack_loose_unreachable;
  static int local;
+ static int have_non_local_packs;
  static int incremental;
  static int ignore_packed_keep;
  static int allow_ofs_delta;
@@@ -342,15 -343,15 +343,15 @@@ static unsigned long write_no_reuse_obj
  }
  
  /* Return 0 if we will bust the pack-size limit */
 -static unsigned long write_reuse_object(struct sha1file *f, struct object_entry *entry,
 -                                      unsigned long limit, int usable_delta)
 +static off_t write_reuse_object(struct sha1file *f, struct object_entry *entry,
 +                              unsigned long limit, int usable_delta)
  {
        struct packed_git *p = entry->in_pack;
        struct pack_window *w_curs = NULL;
        struct revindex_entry *revidx;
        off_t offset;
        enum object_type type = entry->type;
 -      unsigned long datalen;
 +      off_t datalen;
        unsigned char header[10], dheader[10];
        unsigned hdrlen;
  
  }
  
  /* Return 0 if we will bust the pack-size limit */
 -static unsigned long write_object(struct sha1file *f,
 -                                struct object_entry *entry,
 -                                off_t write_offset)
 +static off_t write_object(struct sha1file *f,
 +                        struct object_entry *entry,
 +                        off_t write_offset)
  {
 -      unsigned long limit, len;
 +      unsigned long limit;
 +      off_t len;
        int usable_delta, to_reuse;
  
        if (!pack_to_stdout)
@@@ -493,7 -493,7 +494,7 @@@ static enum write_one_status write_one(
                                       struct object_entry *e,
                                       off_t *offset)
  {
 -      unsigned long size;
 +      off_t size;
        int recursing;
  
        /*
@@@ -978,6 -978,23 +979,23 @@@ static int want_object_in_pack(const un
                                return 1;
                        if (incremental)
                                return 0;
+                       /*
+                        * When asked to do --local (do not include an
+                        * object that appears in a pack we borrow
+                        * from elsewhere) or --honor-pack-keep (do not
+                        * include an object that appears in a pack marked
+                        * with .keep), we need to make sure no copy of this
+                        * object come from in _any_ pack that causes us to
+                        * omit it, and need to complete this loop.  When
+                        * neither option is in effect, we know the object
+                        * we just found is going to be packed, so break
+                        * out of the loop to return 1 now.
+                        */
+                       if (!ignore_packed_keep &&
+                           (!local || !have_non_local_packs))
+                               break;
                        if (local && !p->pack_local)
                                return 0;
                        if (ignore_packed_keep && p->pack_local && p->pack_keep)
@@@ -2784,6 -2801,28 +2802,28 @@@ int cmd_pack_objects(int argc, const ch
                progress = 2;
  
        prepare_packed_git();
+       if (ignore_packed_keep) {
+               struct packed_git *p;
+               for (p = packed_git; p; p = p->next)
+                       if (p->pack_local && p->pack_keep)
+                               break;
+               if (!p) /* no keep-able packs found */
+                       ignore_packed_keep = 0;
+       }
+       if (local) {
+               /*
+                * unlike ignore_packed_keep above, we do not want to
+                * unset "local" based on looking at packs, as it
+                * also covers non-local objects
+                */
+               struct packed_git *p;
+               for (p = packed_git; p; p = p->next) {
+                       if (!p->pack_local) {
+                               have_non_local_packs = 1;
+                               break;
+                       }
+               }
+       }
  
        if (progress)
                progress_state = start_progress(_("Counting objects"), 0);
diff --combined cache.h
index e8128fc5d60579f461d60897fdd6483b3653d75c,40671d160a0a31a475d7f317966798d5dea248b7..7c8051be0ad155e7b340019b7dc5b12be600f79b
+++ b/cache.h
@@@ -632,7 -632,6 +632,7 @@@ extern void fill_stat_cache_info(struc
  #define REFRESH_IGNORE_SUBMODULES     0x0010  /* ignore submodules */
  #define REFRESH_IN_PORCELAIN  0x0020  /* user friendly output, not "needs update" */
  extern int refresh_index(struct index_state *, unsigned int flags, const struct pathspec *pathspec, char *seen, const char *header_msg);
 +extern struct cache_entry *refresh_cache_entry(struct cache_entry *, unsigned int);
  
  extern void update_index_if_able(struct index_state *, struct lock_file *);
  
@@@ -1004,11 -1003,6 +1004,11 @@@ int adjust_shared_perm(const char *path
   * directory while we were working.  To be robust against this kind of
   * race, callers might want to try invoking the function again when it
   * returns SCLD_VANISHED.
 + *
 + * safe_create_leading_directories() temporarily changes path while it
 + * is working but restores it before returning.
 + * safe_create_leading_directories_const() doesn't modify path, even
 + * temporarily.
   */
  enum scld_error {
        SCLD_OK = 0,
@@@ -1230,8 -1224,7 +1230,8 @@@ struct date_mode 
                DATE_ISO8601_STRICT,
                DATE_RFC2822,
                DATE_STRFTIME,
 -              DATE_RAW
 +              DATE_RAW,
 +              DATE_UNIX
        } type;
        const char *strftime_fmt;
        int local;
@@@ -1378,6 -1371,13 +1378,13 @@@ extern struct packed_git 
        char pack_name[FLEX_ARRAY]; /* more */
  } *packed_git;
  
+ /*
+  * A most-recently-used ordered version of the packed_git list, which can
+  * be iterated instead of packed_git (and marked via mru_mark).
+  */
+ struct mru;
+ extern struct mru *packed_git_mru;
  struct pack_entry {
        off_t offset;
        unsigned char sha1[20];
@@@ -1417,7 -1417,6 +1424,6 @@@ extern unsigned char *use_pack(struct p
  extern void close_pack_windows(struct packed_git *);
  extern void close_all_packs(void);
  extern void unuse_pack(struct pack_window **);
- extern void free_pack_by_name(const char *);
  extern void clear_delta_base_cache(void);
  extern struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
  
@@@ -1516,7 -1515,7 +1522,7 @@@ struct object_info 
        /* Request */
        enum object_type *typep;
        unsigned long *sizep;
 -      unsigned long *disk_sizep;
 +      off_t *disk_sizep;
        unsigned char *delta_base_sha1;
        struct strbuf *typename;
  
@@@ -1567,18 -1566,10 +1573,18 @@@ struct git_config_source 
        const char *blob;
  };
  
 +enum config_origin_type {
 +      CONFIG_ORIGIN_BLOB,
 +      CONFIG_ORIGIN_FILE,
 +      CONFIG_ORIGIN_STDIN,
 +      CONFIG_ORIGIN_SUBMODULE_BLOB,
 +      CONFIG_ORIGIN_CMDLINE
 +};
 +
  typedef int (*config_fn_t)(const char *, const char *, void *);
  extern int git_default_config(const char *, const char *, void *);
  extern int git_config_from_file(config_fn_t fn, const char *, void *);
 -extern int git_config_from_mem(config_fn_t fn, const char *origin_type,
 +extern int git_config_from_mem(config_fn_t fn, const enum config_origin_type,
                                        const char *name, const char *buf, size_t len, void *data);
  extern void git_config_push_parameter(const char *text);
  extern int git_config_from_parameters(config_fn_t fn, void *data);
@@@ -1722,7 -1713,7 +1728,7 @@@ extern int ignore_untracked_cache_confi
  struct key_value_info {
        const char *filename;
        int linenr;
 -      const char *origin_type;
 +      enum config_origin_type origin_type;
        enum config_scope scope;
  };
  
diff --combined sha1_file.c
index cb571ac6e8ed0657e39b346b41961caa8cc825be,4eb3318ae0c189106f43ac56ae472f8af66cb14f..3066b5f71c1eb2916081967f396c8427e3e75406
@@@ -23,6 -23,7 +23,7 @@@
  #include "bulk-checkin.h"
  #include "streaming.h"
  #include "dir.h"
+ #include "mru.h"
  
  #ifndef O_NOATIME
  #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@@ -59,14 -60,6 +60,6 @@@ static struct cached_object empty_tree 
        0
  };
  
- /*
-  * A pointer to the last packed_git in which an object was found.
-  * When an object is sought, we look in this packfile first, because
-  * objects that are looked up at similar times are often in the same
-  * packfile as one another.
-  */
- static struct packed_git *last_found_pack;
  static struct cached_object *find_cached_object(const unsigned char *sha1)
  {
        int i;
@@@ -522,6 -515,9 +515,9 @@@ static size_t peak_pack_mapped
  static size_t pack_mapped;
  struct packed_git *packed_git;
  
+ static struct mru packed_git_mru_storage;
+ struct mru *packed_git_mru = &packed_git_mru_storage;
  void pack_report(void)
  {
        fprintf(stderr,
@@@ -891,36 -887,6 +887,6 @@@ void close_pack_index(struct packed_gi
        }
  }
  
- /*
-  * This is used by git-repack in case a newly created pack happens to
-  * contain the same set of objects as an existing one.  In that case
-  * the resulting file might be different even if its name would be the
-  * same.  It is best to close any reference to the old pack before it is
-  * replaced on disk.  Of course no index pointers or windows for given pack
-  * must subsist at this point.  If ever objects from this pack are requested
-  * again, the new version of the pack will be reinitialized through
-  * reprepare_packed_git().
-  */
- void free_pack_by_name(const char *pack_name)
- {
-       struct packed_git *p, **pp = &packed_git;
-       while (*pp) {
-               p = *pp;
-               if (strcmp(pack_name, p->pack_name) == 0) {
-                       clear_delta_base_cache();
-                       close_pack(p);
-                       free(p->bad_object_sha1);
-                       *pp = p->next;
-                       if (last_found_pack == p)
-                               last_found_pack = NULL;
-                       free(p);
-                       return;
-               }
-               pp = &p->next;
-       }
- }
  static unsigned int get_max_fd_limit(void)
  {
  #ifdef RLIMIT_NOFILE
@@@ -1385,6 -1351,15 +1351,15 @@@ static void rearrange_packed_git(void
        free(ary);
  }
  
+ static void prepare_packed_git_mru(void)
+ {
+       struct packed_git *p;
+       mru_clear(packed_git_mru);
+       for (p = packed_git; p; p = p->next)
+               mru_append(packed_git_mru, p);
+ }
  static int prepare_packed_git_run_once = 0;
  void prepare_packed_git(void)
  {
                alt->name[-1] = '/';
        }
        rearrange_packed_git();
+       prepare_packed_git_mru();
        prepare_packed_git_run_once = 1;
  }
  
@@@ -2281,7 -2257,7 +2257,7 @@@ void *unpack_entry(struct packed_git *p
  
                if (do_check_packed_object_crc && p->index_version > 1) {
                        struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
 -                      unsigned long len = revidx[1].offset - obj_offset;
 +                      off_t len = revidx[1].offset - obj_offset;
                        if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
                                const unsigned char *sha1 =
                                        nth_packed_object_sha1(p, revidx->nr);
@@@ -2604,21 -2580,15 +2580,15 @@@ static int fill_pack_entry(const unsign
   */
  static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
  {
-       struct packed_git *p;
+       struct mru_entry *p;
  
        prepare_packed_git();
        if (!packed_git)
                return 0;
  
-       if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
-               return 1;
-       for (p = packed_git; p; p = p->next) {
-               if (p == last_found_pack)
-                       continue; /* we already checked this one */
-               if (fill_pack_entry(sha1, e, p)) {
-                       last_found_pack = p;
+       for (p = packed_git_mru->head; p; p = p->next) {
+               if (fill_pack_entry(sha1, e, p->item)) {
+                       mru_mark(packed_git_mru, p);
                        return 1;
                }
        }