Merge branch 'jk/fast-import-cleanup'
authorJunio C Hamano <gitster@pobox.com>
Tue, 28 Mar 2017 21:05:59 +0000 (14:05 -0700)
committerJunio C Hamano <gitster@pobox.com>
Tue, 28 Mar 2017 21:05:59 +0000 (14:05 -0700)
Code clean-up.

* jk/fast-import-cleanup:
pack.h: define largest possible encoded object size
encode_in_pack_object_header: respect output buffer length
fast-import: use xsnprintf for formatting headers
fast-import: use xsnprintf for writing sha1s

1  2 
builtin/pack-objects.c
fast-import.c
diff --combined builtin/pack-objects.c
index 16517f26375b1a0ebe58b628b7741c7f5e77bc08,28e7498aa186aa15e26c707b8499b4cfa678dde1..84af7c2324bad29dd6522321328921d3afde7d57
@@@ -239,7 -239,8 +239,8 @@@ static unsigned long write_no_reuse_obj
                                           unsigned long limit, int usable_delta)
  {
        unsigned long size, datalen;
-       unsigned char header[10], dheader[10];
+       unsigned char header[MAX_PACK_OBJECT_HEADER],
+                     dheader[MAX_PACK_OBJECT_HEADER];
        unsigned hdrlen;
        enum object_type type;
        void *buf;
         * The object header is a byte of 'type' followed by zero or
         * more bytes of length.
         */
-       hdrlen = encode_in_pack_object_header(type, size, header);
+       hdrlen = encode_in_pack_object_header(header, sizeof(header),
+                                             type, size);
  
        if (type == OBJ_OFS_DELTA) {
                /*
@@@ -352,13 -354,15 +354,15 @@@ static off_t write_reuse_object(struct 
        off_t offset;
        enum object_type type = entry->type;
        off_t datalen;
-       unsigned char header[10], dheader[10];
+       unsigned char header[MAX_PACK_OBJECT_HEADER],
+                     dheader[MAX_PACK_OBJECT_HEADER];
        unsigned hdrlen;
  
        if (entry->delta)
                type = (allow_ofs_delta && entry->delta->idx.offset) ?
                        OBJ_OFS_DELTA : OBJ_REF_DELTA;
-       hdrlen = encode_in_pack_object_header(type, entry->size, header);
+       hdrlen = encode_in_pack_object_header(header, sizeof(header),
+                                             type, entry->size);
  
        offset = entry->in_pack_offset;
        revidx = find_pack_revindex(p, offset);
@@@ -894,15 -898,24 +898,15 @@@ static void write_pack_file(void
                        written, nr_result);
  }
  
 -static void setup_delta_attr_check(struct git_attr_check *check)
 -{
 -      static struct git_attr *attr_delta;
 -
 -      if (!attr_delta)
 -              attr_delta = git_attr("delta");
 -
 -      check[0].attr = attr_delta;
 -}
 -
  static int no_try_delta(const char *path)
  {
 -      struct git_attr_check check[1];
 +      static struct attr_check *check;
  
 -      setup_delta_attr_check(check);
 -      if (git_check_attr(path, ARRAY_SIZE(check), check))
 +      if (!check)
 +              check = attr_check_initl("delta", NULL);
 +      if (git_check_attr(path, check))
                return 0;
 -      if (ATTR_FALSE(check->value))
 +      if (ATTR_FALSE(check->items[0].value))
                return 1;
        return 0;
  }
@@@ -1530,8 -1543,6 +1534,8 @@@ static int pack_offset_sort(const void 
   *   2. Updating our size/type to the non-delta representation. These were
   *      either not recorded initially (size) or overwritten with the delta type
   *      (type) when check_object() decided to reuse the delta.
 + *
 + *   3. Resetting our delta depth, as we are now a base object.
   */
  static void drop_reused_delta(struct object_entry *entry)
  {
                        p = &(*p)->delta_sibling;
        }
        entry->delta = NULL;
 +      entry->depth = 0;
  
        oi.sizep = &entry->size;
        oi.typep = &entry->type;
   * Follow the chain of deltas from this entry onward, throwing away any links
   * that cause us to hit a cycle (as determined by the DFS state flags in
   * the entries).
 + *
 + * We also detect too-long reused chains that would violate our --depth
 + * limit.
   */
  static void break_delta_chains(struct object_entry *entry)
  {
 -      /* If it's not a delta, it can't be part of a cycle. */
 -      if (!entry->delta) {
 -              entry->dfs_state = DFS_DONE;
 -              return;
 -      }
 +      /*
 +       * The actual depth of each object we will write is stored as an int,
 +       * as it cannot exceed our int "depth" limit. But before we break
 +       * changes based no that limit, we may potentially go as deep as the
 +       * number of objects, which is elsewhere bounded to a uint32_t.
 +       */
 +      uint32_t total_depth;
 +      struct object_entry *cur, *next;
 +
 +      for (cur = entry, total_depth = 0;
 +           cur;
 +           cur = cur->delta, total_depth++) {
 +              if (cur->dfs_state == DFS_DONE) {
 +                      /*
 +                       * We've already seen this object and know it isn't
 +                       * part of a cycle. We do need to append its depth
 +                       * to our count.
 +                       */
 +                      total_depth += cur->depth;
 +                      break;
 +              }
 +
 +              /*
 +               * We break cycles before looping, so an ACTIVE state (or any
 +               * other cruft which made its way into the state variable)
 +               * is a bug.
 +               */
 +              if (cur->dfs_state != DFS_NONE)
 +                      die("BUG: confusing delta dfs state in first pass: %d",
 +                          cur->dfs_state);
  
 -      switch (entry->dfs_state) {
 -      case DFS_NONE:
                /*
 -               * This is the first time we've seen the object. We mark it as
 -               * part of the active potential cycle and recurse.
 +               * Now we know this is the first time we've seen the object. If
 +               * it's not a delta, we're done traversing, but we'll mark it
 +               * done to save time on future traversals.
                 */
 -              entry->dfs_state = DFS_ACTIVE;
 -              break_delta_chains(entry->delta);
 -              entry->dfs_state = DFS_DONE;
 -              break;
 +              if (!cur->delta) {
 +                      cur->dfs_state = DFS_DONE;
 +                      break;
 +              }
  
 -      case DFS_DONE:
 -              /* object already examined, and not part of a cycle */
 -              break;
 +              /*
 +               * Mark ourselves as active and see if the next step causes
 +               * us to cycle to another active object. It's important to do
 +               * this _before_ we loop, because it impacts where we make the
 +               * cut, and thus how our total_depth counter works.
 +               * E.g., We may see a partial loop like:
 +               *
 +               *   A -> B -> C -> D -> B
 +               *
 +               * Cutting B->C breaks the cycle. But now the depth of A is
 +               * only 1, and our total_depth counter is at 3. The size of the
 +               * error is always one less than the size of the cycle we
 +               * broke. Commits C and D were "lost" from A's chain.
 +               *
 +               * If we instead cut D->B, then the depth of A is correct at 3.
 +               * We keep all commits in the chain that we examined.
 +               */
 +              cur->dfs_state = DFS_ACTIVE;
 +              if (cur->delta->dfs_state == DFS_ACTIVE) {
 +                      drop_reused_delta(cur);
 +                      cur->dfs_state = DFS_DONE;
 +                      break;
 +              }
 +      }
 +
 +      /*
 +       * And now that we've gone all the way to the bottom of the chain, we
 +       * need to clear the active flags and set the depth fields as
 +       * appropriate. Unlike the loop above, which can quit when it drops a
 +       * delta, we need to keep going to look for more depth cuts. So we need
 +       * an extra "next" pointer to keep going after we reset cur->delta.
 +       */
 +      for (cur = entry; cur; cur = next) {
 +              next = cur->delta;
 +
 +              /*
 +               * We should have a chain of zero or more ACTIVE states down to
 +               * a final DONE. We can quit after the DONE, because either it
 +               * has no bases, or we've already handled them in a previous
 +               * call.
 +               */
 +              if (cur->dfs_state == DFS_DONE)
 +                      break;
 +              else if (cur->dfs_state != DFS_ACTIVE)
 +                      die("BUG: confusing delta dfs state in second pass: %d",
 +                          cur->dfs_state);
  
 -      case DFS_ACTIVE:
                /*
 -               * We found a cycle that needs broken. It would be correct to
 -               * break any link in the chain, but it's convenient to
 -               * break this one.
 +               * If the total_depth is more than depth, then we need to snip
 +               * the chain into two or more smaller chains that don't exceed
 +               * the maximum depth. Most of the resulting chains will contain
 +               * (depth + 1) entries (i.e., depth deltas plus one base), and
 +               * the last chain (i.e., the one containing entry) will contain
 +               * whatever entries are left over, namely
 +               * (total_depth % (depth + 1)) of them.
 +               *
 +               * Since we are iterating towards decreasing depth, we need to
 +               * decrement total_depth as we go, and we need to write to the
 +               * entry what its final depth will be after all of the
 +               * snipping. Since we're snipping into chains of length (depth
 +               * + 1) entries, the final depth of an entry will be its
 +               * original depth modulo (depth + 1). Any time we encounter an
 +               * entry whose final depth is supposed to be zero, we snip it
 +               * from its delta base, thereby making it so.
                 */
 -              drop_reused_delta(entry);
 -              entry->dfs_state = DFS_DONE;
 -              break;
 +              cur->depth = (total_depth--) % (depth + 1);
 +              if (!cur->depth)
 +                      drop_reused_delta(cur);
 +
 +              cur->dfs_state = DFS_DONE;
        }
  }
  
@@@ -2612,17 -2538,17 +2616,17 @@@ static void add_objects_in_unpacked_pac
        free(in_pack.array);
  }
  
 -static int add_loose_object(const unsigned char *sha1, const char *path,
 +static int add_loose_object(const struct object_id *oid, const char *path,
                            void *data)
  {
 -      enum object_type type = sha1_object_info(sha1, NULL);
 +      enum object_type type = sha1_object_info(oid->hash, NULL);
  
        if (type < 0) {
                warning("loose object at %s could not be examined", path);
                return 0;
        }
  
 -      add_object_entry(sha1, type, "", 0);
 +      add_object_entry(oid->hash, type, "", 0);
        return 0;
  }
  
diff --combined fast-import.c
index 41a539f97d94fdd45787c520ceec4b5c2b6102d4,c4ccb3c9cd1eb8af4e31ece5f3e2765565dc2dfb..f6f416f20a9f4c05f90690fcd7bfe049a8c91467
@@@ -940,40 -940,41 +940,40 @@@ static const char *create_index(void
  
  static char *keep_pack(const char *curr_index_name)
  {
 -      static char name[PATH_MAX];
        static const char *keep_msg = "fast-import";
 +      struct strbuf name = STRBUF_INIT;
        int keep_fd;
  
 -      keep_fd = odb_pack_keep(name, sizeof(name), pack_data->sha1);
 +      odb_pack_name(&name, pack_data->sha1, "keep");
 +      keep_fd = odb_pack_keep(name.buf);
        if (keep_fd < 0)
                die_errno("cannot create keep file");
        write_or_die(keep_fd, keep_msg, strlen(keep_msg));
        if (close(keep_fd))
                die_errno("failed to write keep file");
  
 -      snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
 -               get_object_directory(), sha1_to_hex(pack_data->sha1));
 -      if (finalize_object_file(pack_data->pack_name, name))
 +      odb_pack_name(&name, pack_data->sha1, "pack");
 +      if (finalize_object_file(pack_data->pack_name, name.buf))
                die("cannot store pack file");
  
 -      snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
 -               get_object_directory(), sha1_to_hex(pack_data->sha1));
 -      if (finalize_object_file(curr_index_name, name))
 +      odb_pack_name(&name, pack_data->sha1, "idx");
 +      if (finalize_object_file(curr_index_name, name.buf))
                die("cannot store index file");
        free((void *)curr_index_name);
 -      return name;
 +      return strbuf_detach(&name, NULL);
  }
  
  static void unkeep_all_packs(void)
  {
 -      static char name[PATH_MAX];
 +      struct strbuf name = STRBUF_INIT;
        int k;
  
        for (k = 0; k < pack_id; k++) {
                struct packed_git *p = all_packs[k];
 -              snprintf(name, sizeof(name), "%s/pack/pack-%s.keep",
 -                       get_object_directory(), sha1_to_hex(p->sha1));
 -              unlink_or_warn(name);
 +              odb_pack_name(&name, p->sha1, "keep");
 +              unlink_or_warn(name.buf);
        }
 +      strbuf_release(&name);
  }
  
  static int loosen_small_pack(const struct packed_git *p)
@@@ -1032,7 -1033,6 +1032,7 @@@ static void end_packfile(void
                        die("core git rejected index %s", idx_name);
                all_packs[pack_id] = new_p;
                install_packed_git(new_p);
 +              free(idx_name);
  
                /* Print the boundary */
                if (pack_edges) {
@@@ -1173,7 -1173,8 +1173,8 @@@ static int store_object
                delta_count_by_type[type]++;
                e->depth = last->depth + 1;
  
-               hdrlen = encode_in_pack_object_header(OBJ_OFS_DELTA, deltalen, hdr);
+               hdrlen = encode_in_pack_object_header(hdr, sizeof(hdr),
+                                                     OBJ_OFS_DELTA, deltalen);
                sha1write(pack_file, hdr, hdrlen);
                pack_size += hdrlen;
  
                pack_size += sizeof(hdr) - pos;
        } else {
                e->depth = 0;
-               hdrlen = encode_in_pack_object_header(type, dat->len, hdr);
+               hdrlen = encode_in_pack_object_header(hdr, sizeof(hdr),
+                                                     type, dat->len);
                sha1write(pack_file, hdr, hdrlen);
                pack_size += hdrlen;
        }
@@@ -1237,9 -1239,7 +1239,7 @@@ static void stream_blob(uintmax_t len, 
        sha1file_checkpoint(pack_file, &checkpoint);
        offset = checkpoint.offset;
  
-       hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
-       if (out_sz <= hdrlen)
-               die("impossibly large object header");
+       hdrlen = xsnprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
  
        git_SHA1_Init(&c);
        git_SHA1_Update(&c, out_buf, hdrlen);
  
        git_deflate_init(&s, pack_compression_level);
  
-       hdrlen = encode_in_pack_object_header(OBJ_BLOB, len, out_buf);
-       if (out_sz <= hdrlen)
-               die("impossibly large object header");
+       hdrlen = encode_in_pack_object_header(out_buf, out_sz, OBJ_BLOB, len);
  
        s.next_out = out_buf + hdrlen;
        s.avail_out = out_sz - hdrlen;
@@@ -1752,7 -1750,7 +1750,7 @@@ static int update_branch(struct branch 
  
        if (is_null_sha1(b->sha1)) {
                if (b->delete)
 -                      delete_ref(b->name, NULL, 0);
 +                      delete_ref(NULL, b->name, NULL, 0);
                return 0;
        }
        if (read_ref(b->name, old_sha1))
@@@ -3003,7 -3001,7 +3001,7 @@@ static void parse_get_mark(const char *
        if (!oe)
                die("Unknown mark: %s", command_buf.buf);
  
-       snprintf(output, sizeof(output), "%s\n", sha1_to_hex(oe->idx.sha1));
+       xsnprintf(output, sizeof(output), "%s\n", sha1_to_hex(oe->idx.sha1));
        cat_blob_write(output, 41);
  }