Merge branch 'js/commit-graph-chunk-table-fix'
authorJunio C Hamano <gitster@pobox.com>
Tue, 5 Feb 2019 22:26:11 +0000 (14:26 -0800)
committerJunio C Hamano <gitster@pobox.com>
Tue, 5 Feb 2019 22:26:11 +0000 (14:26 -0800)
The codepath to read from the commit-graph file attempted to read
past the end of it when the file's table-of-contents was corrupt.

* js/commit-graph-chunk-table-fix:
Makefile: correct example fuzz build
commit-graph: fix buffer read-overflow
commit-graph, fuzz: add fuzzer for commit-graph

1  2 
Makefile
commit-graph.c
diff --combined Makefile
index 6e8d017e8ed04da7a0319400cc7a3f4f9a74e711,bbcfc2bc9fb27f8424f4647dc09bf10412530bb0..28ee1799d1c78ecc114fd5d6b4fbce32aaec09fa
+++ b/Makefile
@@@ -186,12 -186,6 +186,12 @@@ all:
  # in one call to the platform's SHA1_Update(). e.g. APPLE_COMMON_CRYPTO
  # wants 'SHA1_MAX_BLOCK_SIZE=1024L*1024L*1024L' defined.
  #
 +# Define BLK_SHA256 to use the built-in SHA-256 routines.
 +#
 +# Define GCRYPT_SHA256 to use the SHA-256 routines in libgcrypt.
 +#
 +# Define OPENSSL_SHA256 to use the SHA-256 routines in OpenSSL.
 +#
  # Define NEEDS_CRYPTO_WITH_SSL if you need -lcrypto when using -lssl (Darwin).
  #
  # Define NEEDS_SSL_WITH_CRYPTO if you need -lssl when using -lcrypto (Darwin).
@@@ -690,6 -684,7 +690,7 @@@ SCRIPTS = $(SCRIPT_SH_INS) 
  
  ETAGS_TARGET = TAGS
  
+ FUZZ_OBJS += fuzz-commit-graph.o
  FUZZ_OBJS += fuzz-pack-headers.o
  FUZZ_OBJS += fuzz-pack-idx.o
  
@@@ -730,9 -725,7 +731,9 @@@ TEST_BUILTINS_OBJS += test-dump-split-i
  TEST_BUILTINS_OBJS += test-dump-untracked-cache.o
  TEST_BUILTINS_OBJS += test-example-decorate.o
  TEST_BUILTINS_OBJS += test-genrandom.o
 +TEST_BUILTINS_OBJS += test-hash.o
  TEST_BUILTINS_OBJS += test-hashmap.o
 +TEST_BUILTINS_OBJS += test-hash-speed.o
  TEST_BUILTINS_OBJS += test-index-version.o
  TEST_BUILTINS_OBJS += test-json-writer.o
  TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o
@@@ -755,7 -748,6 +756,7 @@@ TEST_BUILTINS_OBJS += test-run-command.
  TEST_BUILTINS_OBJS += test-scrap-cache-tree.o
  TEST_BUILTINS_OBJS += test-sha1.o
  TEST_BUILTINS_OBJS += test-sha1-array.o
 +TEST_BUILTINS_OBJS += test-sha256.o
  TEST_BUILTINS_OBJS += test-sigchain.o
  TEST_BUILTINS_OBJS += test-strcmp-offset.o
  TEST_BUILTINS_OBJS += test-string-list.o
@@@ -1655,19 -1647,6 +1656,19 @@@ endi
  endif
  endif
  
 +ifdef OPENSSL_SHA256
 +      EXTLIBS += $(LIB_4_CRYPTO)
 +      BASIC_CFLAGS += -DSHA256_OPENSSL
 +else
 +ifdef GCRYPT_SHA256
 +      BASIC_CFLAGS += -DSHA256_GCRYPT
 +      EXTLIBS += -lgcrypt
 +else
 +      LIB_OBJS += sha256/block/sha256.o
 +      BASIC_CFLAGS += -DSHA256_BLK
 +endif
 +endif
 +
  ifdef SHA1_MAX_BLOCK_SIZE
        LIB_OBJS += compat/sha1-chunked.o
        BASIC_CFLAGS += -DSHA1_MAX_BLOCK_SIZE="$(SHA1_MAX_BLOCK_SIZE)"
@@@ -3125,7 -3104,7 +3126,7 @@@ cover_db_html: cover_d
  # An example command to build against libFuzzer from LLVM 4.0.0:
  #
  # make CC=clang CXX=clang++ \
- #      FUZZ_CXXFLAGS="-fsanitize-coverage=trace-pc-guard -fsanitize=address" \
+ #      CFLAGS="-fsanitize-coverage=trace-pc-guard -fsanitize=address" \
  #      LIB_FUZZING_ENGINE=/usr/lib/llvm-4.0/lib/libFuzzer.a \
  #      fuzz-all
  #
diff --combined commit-graph.c
index 30f1781176612af2a6979635a9b5090d190990cd,359e782deed65ec5478c59b1f439adb90aabf124..18bd2b6df79e51105a17a1d2784608ba0370de36
  #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
  #define GRAPH_CHUNKID_LARGEEDGES 0x45444745 /* "EDGE" */
  
 -#define GRAPH_DATA_WIDTH 36
 +#define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
  
  #define GRAPH_VERSION_1 0x1
  #define GRAPH_VERSION GRAPH_VERSION_1
  
 -#define GRAPH_OID_VERSION_SHA1 1
 -#define GRAPH_OID_LEN_SHA1 GIT_SHA1_RAWSZ
 -#define GRAPH_OID_VERSION GRAPH_OID_VERSION_SHA1
 -#define GRAPH_OID_LEN GRAPH_OID_LEN_SHA1
 -
  #define GRAPH_OCTOPUS_EDGES_NEEDED 0x80000000
 -#define GRAPH_PARENT_MISSING 0x7fffffff
  #define GRAPH_EDGE_LAST_MASK 0x7fffffff
  #define GRAPH_PARENT_NONE 0x70000000
  
  #define GRAPH_FANOUT_SIZE (4 * 256)
  #define GRAPH_CHUNKLOOKUP_WIDTH 12
  #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
 -                      + GRAPH_FANOUT_SIZE + GRAPH_OID_LEN)
 +                      + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
  
  char *get_commit_graph_filename(const char *obj_dir)
  {
        return xstrfmt("%s/info/commit-graph", obj_dir);
  }
  
 +static uint8_t oid_version(void)
 +{
 +      return 1;
 +}
 +
  static struct commit_graph *alloc_commit_graph(void)
  {
        struct commit_graph *g = xcalloc(1, sizeof(*g));
@@@ -83,16 -84,10 +83,10 @@@ static int commit_graph_compatible(stru
  struct commit_graph *load_commit_graph_one(const char *graph_file)
  {
        void *graph_map;
-       const unsigned char *data, *chunk_lookup;
        size_t graph_size;
        struct stat st;
-       uint32_t i;
-       struct commit_graph *graph;
+       struct commit_graph *ret;
        int fd = git_open(graph_file);
-       uint64_t last_chunk_offset;
-       uint32_t last_chunk_id;
-       uint32_t graph_signature;
-       unsigned char graph_version, hash_version;
  
        if (fd < 0)
                return NULL;
                die(_("graph file %s is too small"), graph_file);
        }
        graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
+       ret = parse_commit_graph(graph_map, fd, graph_size);
+       if (!ret) {
+               munmap(graph_map, graph_size);
+               close(fd);
+               exit(1);
+       }
+       return ret;
+ }
+ struct commit_graph *parse_commit_graph(void *graph_map, int fd,
+                                       size_t graph_size)
+ {
+       const unsigned char *data, *chunk_lookup;
+       uint32_t i;
+       struct commit_graph *graph;
+       uint64_t last_chunk_offset;
+       uint32_t last_chunk_id;
+       uint32_t graph_signature;
+       unsigned char graph_version, hash_version;
+       if (!graph_map)
+               return NULL;
+       if (graph_size < GRAPH_MIN_SIZE)
+               return NULL;
        data = (const unsigned char *)graph_map;
  
        graph_signature = get_be32(data);
        if (graph_signature != GRAPH_SIGNATURE) {
                error(_("graph signature %X does not match signature %X"),
                      graph_signature, GRAPH_SIGNATURE);
-               goto cleanup_fail;
+               return NULL;
        }
  
        graph_version = *(unsigned char*)(data + 4);
        if (graph_version != GRAPH_VERSION) {
                error(_("graph version %X does not match version %X"),
                      graph_version, GRAPH_VERSION);
-               goto cleanup_fail;
+               return NULL;
        }
  
        hash_version = *(unsigned char*)(data + 5);
 -      if (hash_version != GRAPH_OID_VERSION) {
 +      if (hash_version != oid_version()) {
                error(_("hash version %X does not match version %X"),
 -                    hash_version, GRAPH_OID_VERSION);
 +                    hash_version, oid_version());
-               goto cleanup_fail;
+               return NULL;
        }
  
        graph = alloc_commit_graph();
  
 -      graph->hash_len = GRAPH_OID_LEN;
 +      graph->hash_len = the_hash_algo->rawsz;
        graph->num_chunks = *(unsigned char*)(data + 6);
        graph->graph_fd = fd;
        graph->data = graph_map;
        last_chunk_offset = 8;
        chunk_lookup = data + 8;
        for (i = 0; i < graph->num_chunks; i++) {
-               uint32_t chunk_id = get_be32(chunk_lookup + 0);
-               uint64_t chunk_offset = get_be64(chunk_lookup + 4);
+               uint32_t chunk_id;
+               uint64_t chunk_offset;
                int chunk_repeated = 0;
  
+               if (data + graph_size - chunk_lookup <
+                   GRAPH_CHUNKLOOKUP_WIDTH) {
+                       error(_("chunk lookup table entry missing; graph file may be incomplete"));
+                       free(graph);
+                       return NULL;
+               }
+               chunk_id = get_be32(chunk_lookup + 0);
+               chunk_offset = get_be64(chunk_lookup + 4);
                chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
  
 -              if (chunk_offset > graph_size - GIT_MAX_RAWSZ) {
 +              if (chunk_offset > graph_size - the_hash_algo->rawsz) {
                        error(_("improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
                              (uint32_t)chunk_offset);
-                       goto cleanup_fail;
+                       free(graph);
+                       return NULL;
                }
  
                switch (chunk_id) {
  
                if (chunk_repeated) {
                        error(_("chunk id %08x appears multiple times"), chunk_id);
-                       goto cleanup_fail;
+                       free(graph);
+                       return NULL;
                }
  
                if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
        }
  
        return graph;
- cleanup_fail:
-       munmap(graph_map, graph_size);
-       close(fd);
-       exit(1);
  }
  
  static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
@@@ -288,8 -318,7 +317,8 @@@ static int bsearch_graph(struct commit_
                            g->chunk_oid_lookup, g->hash_len, pos);
  }
  
 -static struct commit_list **insert_parent_or_die(struct commit_graph *g,
 +static struct commit_list **insert_parent_or_die(struct repository *r,
 +                                               struct commit_graph *g,
                                                 uint64_t pos,
                                                 struct commit_list **pptr)
  {
                die("invalid parent position %"PRIu64, pos);
  
        hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos);
 -      c = lookup_commit(the_repository, &oid);
 +      c = lookup_commit(r, &oid);
        if (!c)
                die(_("could not find commit %s"), oid_to_hex(&oid));
        c->graph_pos = pos;
@@@ -314,9 -343,7 +343,9 @@@ static void fill_commit_graph_info(stru
        item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
  }
  
 -static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos)
 +static int fill_commit_in_graph(struct repository *r,
 +                              struct commit *item,
 +                              struct commit_graph *g, uint32_t pos)
  {
        uint32_t edge_value;
        uint32_t *parent_data_ptr;
        edge_value = get_be32(commit_data + g->hash_len);
        if (edge_value == GRAPH_PARENT_NONE)
                return 1;
 -      pptr = insert_parent_or_die(g, edge_value, pptr);
 +      pptr = insert_parent_or_die(r, g, edge_value, pptr);
  
        edge_value = get_be32(commit_data + g->hash_len + 4);
        if (edge_value == GRAPH_PARENT_NONE)
                return 1;
        if (!(edge_value & GRAPH_OCTOPUS_EDGES_NEEDED)) {
 -              pptr = insert_parent_or_die(g, edge_value, pptr);
 +              pptr = insert_parent_or_die(r, g, edge_value, pptr);
                return 1;
        }
  
                          4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK));
        do {
                edge_value = get_be32(parent_data_ptr);
 -              pptr = insert_parent_or_die(g,
 +              pptr = insert_parent_or_die(r, g,
                                            edge_value & GRAPH_EDGE_LAST_MASK,
                                            pptr);
                parent_data_ptr++;
@@@ -373,9 -400,7 +402,9 @@@ static int find_commit_in_graph(struct 
        }
  }
  
 -static int parse_commit_in_graph_one(struct commit_graph *g, struct commit *item)
 +static int parse_commit_in_graph_one(struct repository *r,
 +                                   struct commit_graph *g,
 +                                   struct commit *item)
  {
        uint32_t pos;
  
                return 1;
  
        if (find_commit_in_graph(item, g, &pos))
 -              return fill_commit_in_graph(item, g, pos);
 +              return fill_commit_in_graph(r, item, g, pos);
  
        return 0;
  }
@@@ -392,7 -417,7 +421,7 @@@ int parse_commit_in_graph(struct reposi
  {
        if (!prepare_commit_graph(r))
                return 0;
 -      return parse_commit_in_graph_one(r->objects->commit_graph, item);
 +      return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
  }
  
  void load_commit_graph_info(struct repository *r, struct commit *item)
                fill_commit_graph_info(item, r->objects->commit_graph, pos);
  }
  
 -static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c)
 +static struct tree *load_tree_for_commit(struct repository *r,
 +                                       struct commit_graph *g,
 +                                       struct commit *c)
  {
        struct object_id oid;
        const unsigned char *commit_data = g->chunk_commit_data +
                                           GRAPH_DATA_WIDTH * (c->graph_pos);
  
        hashcpy(oid.hash, commit_data);
 -      c->maybe_tree = lookup_tree(the_repository, &oid);
 +      c->maybe_tree = lookup_tree(r, &oid);
  
        return c->maybe_tree;
  }
  
 -static struct tree *get_commit_tree_in_graph_one(struct commit_graph *g,
 +static struct tree *get_commit_tree_in_graph_one(struct repository *r,
 +                                               struct commit_graph *g,
                                                 const struct commit *c)
  {
        if (c->maybe_tree)
        if (c->graph_pos == COMMIT_NOT_FROM_GRAPH)
                BUG("get_commit_tree_in_graph_one called from non-commit-graph commit");
  
 -      return load_tree_for_commit(g, (struct commit *)c);
 +      return load_tree_for_commit(r, g, (struct commit *)c);
  }
  
  struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit *c)
  {
 -      return get_commit_tree_in_graph_one(r->objects->commit_graph, c);
 +      return get_commit_tree_in_graph_one(r, r->objects->commit_graph, c);
  }
  
  static void write_graph_chunk_fanout(struct hashfile *f,
@@@ -500,9 -522,7 +529,9 @@@ static void write_graph_chunk_data(stru
                                              commit_to_sha1);
  
                        if (edge_value < 0)
 -                              edge_value = GRAPH_PARENT_MISSING;
 +                              BUG("missing parent %s for commit %s",
 +                                  oid_to_hex(&parent->item->object.oid),
 +                                  oid_to_hex(&(*list)->object.oid));
                }
  
                hashwrite_be32(f, edge_value);
                                              nr_commits,
                                              commit_to_sha1);
                        if (edge_value < 0)
 -                              edge_value = GRAPH_PARENT_MISSING;
 +                              BUG("missing parent %s for commit %s",
 +                                  oid_to_hex(&parent->item->object.oid),
 +                                  oid_to_hex(&(*list)->object.oid));
                }
  
                hashwrite_be32(f, edge_value);
@@@ -575,9 -593,7 +604,9 @@@ static void write_graph_chunk_large_edg
                                                  commit_to_sha1);
  
                        if (edge_value < 0)
 -                              edge_value = GRAPH_PARENT_MISSING;
 +                              BUG("missing parent %s for commit %s",
 +                                  oid_to_hex(&parent->item->object.oid),
 +                                  oid_to_hex(&(*list)->object.oid));
                        else if (!parent->next)
                                edge_value |= GRAPH_LAST_EDGE;
  
@@@ -781,7 -797,6 +810,7 @@@ void write_commit_graph(const char *obj
        int num_extra_edges;
        struct commit_list *parent;
        struct progress *progress = NULL;
 +      const unsigned hashsz = the_hash_algo->rawsz;
  
        if (!commit_graph_compatible(the_repository))
                return;
                        count_distinct++;
        }
  
 -      if (count_distinct >= GRAPH_PARENT_MISSING)
 +      if (count_distinct >= GRAPH_EDGE_LAST_MASK)
                die(_("the commit graph format cannot write %d commits"), count_distinct);
  
        commits.nr = 0;
        }
        num_chunks = num_extra_edges ? 4 : 3;
  
 -      if (commits.nr >= GRAPH_PARENT_MISSING)
 +      if (commits.nr >= GRAPH_EDGE_LAST_MASK)
                die(_("too many commits to write graph"));
  
        compute_generation_numbers(&commits, report_progress);
        hashwrite_be32(f, GRAPH_SIGNATURE);
  
        hashwrite_u8(f, GRAPH_VERSION);
 -      hashwrite_u8(f, GRAPH_OID_VERSION);
 +      hashwrite_u8(f, oid_version());
        hashwrite_u8(f, num_chunks);
        hashwrite_u8(f, 0); /* unused padding byte */
  
  
        chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
        chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
 -      chunk_offsets[2] = chunk_offsets[1] + GRAPH_OID_LEN * commits.nr;
 -      chunk_offsets[3] = chunk_offsets[2] + (GRAPH_OID_LEN + 16) * commits.nr;
 +      chunk_offsets[2] = chunk_offsets[1] + hashsz * commits.nr;
 +      chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * commits.nr;
        chunk_offsets[4] = chunk_offsets[3] + 4 * num_extra_edges;
  
        for (i = 0; i <= num_chunks; i++) {
        }
  
        write_graph_chunk_fanout(f, commits.list, commits.nr);
 -      write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr);
 -      write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr);
 +      write_graph_chunk_oids(f, hashsz, commits.list, commits.nr);
 +      write_graph_chunk_data(f, hashsz, commits.list, commits.nr);
        write_graph_chunk_large_edges(f, commits.list, commits.nr);
  
        close_commit_graph(the_repository);
@@@ -1043,7 -1058,7 +1072,7 @@@ int verify_commit_graph(struct reposito
                }
  
                graph_commit = lookup_commit(r, &cur_oid);
 -              if (!parse_commit_in_graph_one(g, graph_commit))
 +              if (!parse_commit_in_graph_one(r, g, graph_commit))
                        graph_report("failed to parse %s from commit-graph",
                                     oid_to_hex(&cur_oid));
        }
                        continue;
                }
  
 -              if (!oideq(&get_commit_tree_in_graph_one(g, graph_commit)->object.oid,
 +              if (!oideq(&get_commit_tree_in_graph_one(r, g, graph_commit)->object.oid,
                           get_commit_tree_oid(odb_commit)))
                        graph_report("root tree OID for commit %s in commit-graph is %s != %s",
                                     oid_to_hex(&cur_oid),