Merge branch 'jk/disable-pack-reuse-when-broken' into maint
authorJunio C Hamano <gitster@pobox.com>
Sun, 4 Jun 2017 01:21:02 +0000 (10:21 +0900)
committerJunio C Hamano <gitster@pobox.com>
Sun, 4 Jun 2017 01:21:02 +0000 (10:21 +0900)
"pack-objects" can stream a slice of an existing packfile out when
the pack bitmap can tell that the reachable objects are all needed
in the output, without inspecting individual objects. This
strategy however would not work well when "--local" and other
options are in use, and need to be disabled.

* jk/disable-pack-reuse-when-broken:
t5310: fix "; do" style
pack-objects: disable pack reuse for object-selection options

1  2 
builtin/pack-objects.c
diff --combined builtin/pack-objects.c
index 0fe35d1b5aebd74116d66d5414bdabf19baf7d97,9cd13dfdbe914f68e93cb5af65b7b97f268809f7..50e01aa80ec7f56b32d47e841e1548c174501bc2
@@@ -239,8 -239,7 +239,8 @@@ static unsigned long write_no_reuse_obj
                                           unsigned long limit, int usable_delta)
  {
        unsigned long size, datalen;
 -      unsigned char header[10], dheader[10];
 +      unsigned char header[MAX_PACK_OBJECT_HEADER],
 +                    dheader[MAX_PACK_OBJECT_HEADER];
        unsigned hdrlen;
        enum object_type type;
        void *buf;
         * The object header is a byte of 'type' followed by zero or
         * more bytes of length.
         */
 -      hdrlen = encode_in_pack_object_header(type, size, header);
 +      hdrlen = encode_in_pack_object_header(header, sizeof(header),
 +                                            type, size);
  
        if (type == OBJ_OFS_DELTA) {
                /*
@@@ -354,15 -352,13 +354,15 @@@ static off_t write_reuse_object(struct 
        off_t offset;
        enum object_type type = entry->type;
        off_t datalen;
 -      unsigned char header[10], dheader[10];
 +      unsigned char header[MAX_PACK_OBJECT_HEADER],
 +                    dheader[MAX_PACK_OBJECT_HEADER];
        unsigned hdrlen;
  
        if (entry->delta)
                type = (allow_ofs_delta && entry->delta->idx.offset) ?
                        OBJ_OFS_DELTA : OBJ_REF_DELTA;
 -      hdrlen = encode_in_pack_object_header(type, entry->size, header);
 +      hdrlen = encode_in_pack_object_header(header, sizeof(header),
 +                                            type, entry->size);
  
        offset = entry->in_pack_offset;
        revidx = find_pack_revindex(p, offset);
@@@ -898,15 -894,24 +898,15 @@@ static void write_pack_file(void
                        written, nr_result);
  }
  
 -static void setup_delta_attr_check(struct git_attr_check *check)
 -{
 -      static struct git_attr *attr_delta;
 -
 -      if (!attr_delta)
 -              attr_delta = git_attr("delta");
 -
 -      check[0].attr = attr_delta;
 -}
 -
  static int no_try_delta(const char *path)
  {
 -      struct git_attr_check check[1];
 +      static struct attr_check *check;
  
 -      setup_delta_attr_check(check);
 -      if (git_check_attr(path, ARRAY_SIZE(check), check))
 +      if (!check)
 +              check = attr_check_initl("delta", NULL);
 +      if (git_check_attr(path, check))
                return 0;
 -      if (ATTR_FALSE(check->value))
 +      if (ATTR_FALSE(check->items[0].value))
                return 1;
        return 0;
  }
@@@ -1534,8 -1539,6 +1534,8 @@@ static int pack_offset_sort(const void 
   *   2. Updating our size/type to the non-delta representation. These were
   *      either not recorded initially (size) or overwritten with the delta type
   *      (type) when check_object() decided to reuse the delta.
 + *
 + *   3. Resetting our delta depth, as we are now a base object.
   */
  static void drop_reused_delta(struct object_entry *entry)
  {
                        p = &(*p)->delta_sibling;
        }
        entry->delta = NULL;
 +      entry->depth = 0;
  
        oi.sizep = &entry->size;
        oi.typep = &entry->type;
   * Follow the chain of deltas from this entry onward, throwing away any links
   * that cause us to hit a cycle (as determined by the DFS state flags in
   * the entries).
 + *
 + * We also detect too-long reused chains that would violate our --depth
 + * limit.
   */
  static void break_delta_chains(struct object_entry *entry)
  {
 -      /* If it's not a delta, it can't be part of a cycle. */
 -      if (!entry->delta) {
 -              entry->dfs_state = DFS_DONE;
 -              return;
 -      }
 +      /*
 +       * The actual depth of each object we will write is stored as an int,
 +       * as it cannot exceed our int "depth" limit. But before we break
 +       * changes based no that limit, we may potentially go as deep as the
 +       * number of objects, which is elsewhere bounded to a uint32_t.
 +       */
 +      uint32_t total_depth;
 +      struct object_entry *cur, *next;
 +
 +      for (cur = entry, total_depth = 0;
 +           cur;
 +           cur = cur->delta, total_depth++) {
 +              if (cur->dfs_state == DFS_DONE) {
 +                      /*
 +                       * We've already seen this object and know it isn't
 +                       * part of a cycle. We do need to append its depth
 +                       * to our count.
 +                       */
 +                      total_depth += cur->depth;
 +                      break;
 +              }
  
 -      switch (entry->dfs_state) {
 -      case DFS_NONE:
                /*
 -               * This is the first time we've seen the object. We mark it as
 -               * part of the active potential cycle and recurse.
 +               * We break cycles before looping, so an ACTIVE state (or any
 +               * other cruft which made its way into the state variable)
 +               * is a bug.
                 */
 -              entry->dfs_state = DFS_ACTIVE;
 -              break_delta_chains(entry->delta);
 -              entry->dfs_state = DFS_DONE;
 -              break;
 +              if (cur->dfs_state != DFS_NONE)
 +                      die("BUG: confusing delta dfs state in first pass: %d",
 +                          cur->dfs_state);
  
 -      case DFS_DONE:
 -              /* object already examined, and not part of a cycle */
 -              break;
 +              /*
 +               * Now we know this is the first time we've seen the object. If
 +               * it's not a delta, we're done traversing, but we'll mark it
 +               * done to save time on future traversals.
 +               */
 +              if (!cur->delta) {
 +                      cur->dfs_state = DFS_DONE;
 +                      break;
 +              }
  
 -      case DFS_ACTIVE:
                /*
 -               * We found a cycle that needs broken. It would be correct to
 -               * break any link in the chain, but it's convenient to
 -               * break this one.
 +               * Mark ourselves as active and see if the next step causes
 +               * us to cycle to another active object. It's important to do
 +               * this _before_ we loop, because it impacts where we make the
 +               * cut, and thus how our total_depth counter works.
 +               * E.g., We may see a partial loop like:
 +               *
 +               *   A -> B -> C -> D -> B
 +               *
 +               * Cutting B->C breaks the cycle. But now the depth of A is
 +               * only 1, and our total_depth counter is at 3. The size of the
 +               * error is always one less than the size of the cycle we
 +               * broke. Commits C and D were "lost" from A's chain.
 +               *
 +               * If we instead cut D->B, then the depth of A is correct at 3.
 +               * We keep all commits in the chain that we examined.
                 */
 -              drop_reused_delta(entry);
 -              entry->dfs_state = DFS_DONE;
 -              break;
 +              cur->dfs_state = DFS_ACTIVE;
 +              if (cur->delta->dfs_state == DFS_ACTIVE) {
 +                      drop_reused_delta(cur);
 +                      cur->dfs_state = DFS_DONE;
 +                      break;
 +              }
 +      }
 +
 +      /*
 +       * And now that we've gone all the way to the bottom of the chain, we
 +       * need to clear the active flags and set the depth fields as
 +       * appropriate. Unlike the loop above, which can quit when it drops a
 +       * delta, we need to keep going to look for more depth cuts. So we need
 +       * an extra "next" pointer to keep going after we reset cur->delta.
 +       */
 +      for (cur = entry; cur; cur = next) {
 +              next = cur->delta;
 +
 +              /*
 +               * We should have a chain of zero or more ACTIVE states down to
 +               * a final DONE. We can quit after the DONE, because either it
 +               * has no bases, or we've already handled them in a previous
 +               * call.
 +               */
 +              if (cur->dfs_state == DFS_DONE)
 +                      break;
 +              else if (cur->dfs_state != DFS_ACTIVE)
 +                      die("BUG: confusing delta dfs state in second pass: %d",
 +                          cur->dfs_state);
 +
 +              /*
 +               * If the total_depth is more than depth, then we need to snip
 +               * the chain into two or more smaller chains that don't exceed
 +               * the maximum depth. Most of the resulting chains will contain
 +               * (depth + 1) entries (i.e., depth deltas plus one base), and
 +               * the last chain (i.e., the one containing entry) will contain
 +               * whatever entries are left over, namely
 +               * (total_depth % (depth + 1)) of them.
 +               *
 +               * Since we are iterating towards decreasing depth, we need to
 +               * decrement total_depth as we go, and we need to write to the
 +               * entry what its final depth will be after all of the
 +               * snipping. Since we're snipping into chains of length (depth
 +               * + 1) entries, the final depth of an entry will be its
 +               * original depth modulo (depth + 1). Any time we encounter an
 +               * entry whose final depth is supposed to be zero, we snip it
 +               * from its delta base, thereby making it so.
 +               */
 +              cur->depth = (total_depth--) % (depth + 1);
 +              if (!cur->depth)
 +                      drop_reused_delta(cur);
 +
 +              cur->dfs_state = DFS_DONE;
        }
  }
  
@@@ -2616,17 -2534,17 +2616,17 @@@ static void add_objects_in_unpacked_pac
        free(in_pack.array);
  }
  
 -static int add_loose_object(const unsigned char *sha1, const char *path,
 +static int add_loose_object(const struct object_id *oid, const char *path,
                            void *data)
  {
 -      enum object_type type = sha1_object_info(sha1, NULL);
 +      enum object_type type = sha1_object_info(oid->hash, NULL);
  
        if (type < 0) {
                warning("loose object at %s could not be examined", path);
                return 0;
        }
  
 -      add_object_entry(sha1, type, "", 0);
 +      add_object_entry(oid->hash, type, "", 0);
        return 0;
  }
  
@@@ -2672,16 -2590,16 +2672,16 @@@ static int has_sha1_pack_kept_or_nonloc
   *
   * This is filled by get_object_list.
   */
 -static struct sha1_array recent_objects;
 +static struct oid_array recent_objects;
  
 -static int loosened_object_can_be_discarded(const unsigned char *sha1,
 +static int loosened_object_can_be_discarded(const struct object_id *oid,
                                            unsigned long mtime)
  {
        if (!unpack_unreachable_expiration)
                return 0;
        if (mtime > unpack_unreachable_expiration)
                return 0;
 -      if (sha1_array_lookup(&recent_objects, sha1) >= 0)
 +      if (oid_array_lookup(&recent_objects, oid) >= 0)
                return 0;
        return 1;
  }
@@@ -2690,7 -2608,7 +2690,7 @@@ static void loosen_unused_packed_object
  {
        struct packed_git *p;
        uint32_t i;
 -      const unsigned char *sha1;
 +      struct object_id oid;
  
        for (p = packed_git; p; p = p->next) {
                if (!p->pack_local || p->pack_keep)
                        die("cannot open pack index");
  
                for (i = 0; i < p->num_objects; i++) {
 -                      sha1 = nth_packed_object_sha1(p, i);
 -                      if (!packlist_find(&to_pack, sha1, NULL) &&
 -                          !has_sha1_pack_kept_or_nonlocal(sha1) &&
 -                          !loosened_object_can_be_discarded(sha1, p->mtime))
 -                              if (force_object_loose(sha1, p->mtime))
 +                      nth_packed_object_oid(&oid, p, i);
 +                      if (!packlist_find(&to_pack, oid.hash, NULL) &&
 +                          !has_sha1_pack_kept_or_nonlocal(oid.hash) &&
 +                          !loosened_object_can_be_discarded(&oid, p->mtime))
 +                              if (force_object_loose(oid.hash, p->mtime))
                                        die("unable to force loose object");
                }
        }
   */
  static int pack_options_allow_reuse(void)
  {
-       return pack_to_stdout && allow_ofs_delta;
+       return pack_to_stdout &&
+              allow_ofs_delta &&
+              !ignore_packed_keep &&
+              (!local || !have_non_local_packs) &&
+              !incremental;
  }
  
  static int get_object_list_from_bitmap(struct rev_info *revs)
@@@ -2743,12 -2665,12 +2747,12 @@@ static void record_recent_object(struc
                                 const char *name,
                                 void *data)
  {
 -      sha1_array_append(&recent_objects, obj->oid.hash);
 +      oid_array_append(&recent_objects, &obj->oid);
  }
  
  static void record_recent_commit(struct commit *commit, void *data)
  {
 -      sha1_array_append(&recent_objects, commit->object.oid.hash);
 +      oid_array_append(&recent_objects, &commit->object.oid);
  }
  
  static void get_object_list(int ac, const char **av)
        if (unpack_unreachable)
                loosen_unused_packed_objects(&revs);
  
 -      sha1_array_clear(&recent_objects);
 +      oid_array_clear(&recent_objects);
  }
  
  static int option_parse_index_version(const struct option *opt,