sha1_file: remove recursion in packed_object_info
authorThomas Rast <trast@student.ethz.ch>
Mon, 25 Mar 2013 18:07:39 +0000 (19:07 +0100)
committerJunio C Hamano <gitster@pobox.com>
Mon, 25 Mar 2013 22:48:18 +0000 (15:48 -0700)
packed_object_info() and packed_delta_info() were mutually recursive.
The former would handle ordinary types and defer deltas to the latter;
the latter would use the former to resolve the delta base.

This arrangement, however, leads to trouble with threaded index-pack
and long delta chains on platforms where thread stacks are small, as
happened on OS X (512kB thread stacks by default) with the chromium
repo.

The task of the two functions is not all that hard to describe without
any recursion, however. It proceeds in three steps:

- determine the representation type and size, based on the outermost
object (delta or not)

- follow through the delta chain, if any

- determine the object type from what is found at the end of the delta
chain

The only complication stems from the error recovery. If parsing fails
at any step, we want to mark that object (within the pack) as bad and
try getting the corresponding SHA1 from elsewhere. If that also
fails, we want to repeat this process back up the delta chain until we
find a reasonable solution or conclude that there is no way to
reconstruct the object. (This is conveniently checked by t5303.)

To achieve that within the pack, we keep track of the entire delta
chain in a stack. When things go sour, we process that stack from the
top, marking entries as bad and attempting to re-resolve by sha1. To
avoid excessive malloc(), the stack starts out with a small
stack-allocated array. The choice of 64 is based on the default of
pack.depth, which is 50, in the hope that it covers "most" delta
chains without any need for malloc().

It's much harder to make the actual re-resolving by sha1 nonrecursive,
so we skip that. If you can't afford *that* recursion, your
corruption problems are more serious than your stack size problems.

Reported-by: Stefan Zager <szager@google.com>
Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
sha1_file.c
index 40b23297b2e1e60a3719e9c67256303e39456604..71877a75fd05a9d11432d50fb86bb60765201529 100644 (file)
@@ -1560,50 +1560,6 @@ static off_t get_delta_base(struct packed_git *p,
        return base_offset;
 }
 
-/* forward declaration for a mutually recursive function */
-static int packed_object_info(struct packed_git *p, off_t offset,
-                             unsigned long *sizep, int *rtype);
-
-static int packed_delta_info(struct packed_git *p,
-                            struct pack_window **w_curs,
-                            off_t curpos,
-                            enum object_type type,
-                            off_t obj_offset,
-                            unsigned long *sizep)
-{
-       off_t base_offset;
-
-       base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
-       if (!base_offset)
-               return OBJ_BAD;
-       type = packed_object_info(p, base_offset, NULL, NULL);
-       if (type <= OBJ_NONE) {
-               struct revindex_entry *revidx;
-               const unsigned char *base_sha1;
-               revidx = find_pack_revindex(p, base_offset);
-               if (!revidx)
-                       return OBJ_BAD;
-               base_sha1 = nth_packed_object_sha1(p, revidx->nr);
-               mark_bad_packed_object(p, base_sha1);
-               type = sha1_object_info(base_sha1, NULL);
-               if (type <= OBJ_NONE)
-                       return OBJ_BAD;
-       }
-
-       /* We choose to only get the type of the base object and
-        * ignore potentially corrupt pack file that expects the delta
-        * based on a base with a wrong size.  This saves tons of
-        * inflate() calls.
-        */
-       if (sizep) {
-               *sizep = get_size_from_delta(p, w_curs, curpos);
-               if (*sizep == 0)
-                       type = OBJ_BAD;
-       }
-
-       return type;
-}
-
 int unpack_object_header(struct packed_git *p,
                         struct pack_window **w_curs,
                         off_t *curpos,
@@ -1630,6 +1586,25 @@ int unpack_object_header(struct packed_git *p,
        return type;
 }
 
+static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
+{
+       int type;
+       struct revindex_entry *revidx;
+       const unsigned char *sha1;
+       revidx = find_pack_revindex(p, obj_offset);
+       if (!revidx)
+               return OBJ_BAD;
+       sha1 = nth_packed_object_sha1(p, revidx->nr);
+       mark_bad_packed_object(p, sha1);
+       type = sha1_object_info(sha1, NULL);
+       if (type <= OBJ_NONE)
+               return OBJ_BAD;
+       return type;
+}
+
+
+#define POI_STACK_PREALLOC 64
+
 static int packed_object_info(struct packed_git *p, off_t obj_offset,
                              unsigned long *sizep, int *rtype)
 {
@@ -1637,31 +1612,89 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
        unsigned long size;
        off_t curpos = obj_offset;
        enum object_type type;
+       off_t small_poi_stack[POI_STACK_PREALLOC];
+       off_t *poi_stack = small_poi_stack;
+       int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
 
        type = unpack_object_header(p, &w_curs, &curpos, &size);
+
        if (rtype)
                *rtype = type; /* representation type */
 
+       if (sizep) {
+               if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
+                       off_t tmp_pos = curpos;
+                       off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
+                                                          type, obj_offset);
+                       if (!base_offset) {
+                               type = OBJ_BAD;
+                               goto out;
+                       }
+                       *sizep = get_size_from_delta(p, &w_curs, tmp_pos);
+                       if (*sizep == 0) {
+                               type = OBJ_BAD;
+                               goto out;
+                       }
+               } else {
+                       *sizep = size;
+               }
+       }
+
+       while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
+               off_t base_offset;
+               /* Push the object we're going to leave behind */
+               if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
+                       poi_stack_alloc = alloc_nr(poi_stack_nr);
+                       poi_stack = xmalloc(sizeof(off_t)*poi_stack_alloc);
+                       memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
+               } else {
+                       ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
+               }
+               poi_stack[poi_stack_nr++] = obj_offset;
+               /* If parsing the base offset fails, just unwind */
+               base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
+               if (!base_offset)
+                       goto unwind;
+               curpos = obj_offset = base_offset;
+               type = unpack_object_header(p, &w_curs, &curpos, &size);
+               if (type <= OBJ_NONE) {
+                       /* If getting the base itself fails, we first
+                        * retry the base, otherwise unwind */
+                       type = retry_bad_packed_offset(p, base_offset);
+                       if (type > OBJ_NONE)
+                               goto out;
+                       goto unwind;
+               }
+       }
+
        switch (type) {
-       case OBJ_OFS_DELTA:
-       case OBJ_REF_DELTA:
-               type = packed_delta_info(p, &w_curs, curpos,
-                                        type, obj_offset, sizep);
-               break;
+       case OBJ_BAD:
        case OBJ_COMMIT:
        case OBJ_TREE:
        case OBJ_BLOB:
        case OBJ_TAG:
-               if (sizep)
-                       *sizep = size;
                break;
        default:
                error("unknown object type %i at offset %"PRIuMAX" in %s",
                      type, (uintmax_t)obj_offset, p->pack_name);
                type = OBJ_BAD;
        }
+
+out:
+       if (poi_stack != small_poi_stack)
+               free(poi_stack);
        unuse_pack(&w_curs);
        return type;
+
+unwind:
+       while (poi_stack_nr) {
+               obj_offset = poi_stack[--poi_stack_nr];
+               type = retry_bad_packed_offset(p, obj_offset);
+               if (type > OBJ_NONE)
+                       goto out;
+       }
+       type = OBJ_BAD;
+       goto out;
 }
 
 static void *unpack_compressed_entry(struct packed_git *p,