Merge branch 'nd/pack-ofs-4gb-limit'
authorJunio C Hamano <gitster@pobox.com>
Thu, 28 Jul 2016 17:34:42 +0000 (10:34 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 28 Jul 2016 17:34:42 +0000 (10:34 -0700)
"git pack-objects" and "git index-pack" mostly operate with off_t
when talking about the offset of objects in a packfile, but there
were a handful of places that used "unsigned long" to hold that
value, leading to an unintended truncation.

* nd/pack-ofs-4gb-limit:
fsck: use streaming interface for large blobs in pack
pack-objects: do not truncate result in-pack object size on 32-bit systems
index-pack: correct "offset" type in unpack_entry_data()
index-pack: report correct bad object offsets even if they are large
index-pack: correct "len" type in unpack_data()
sha1_file.c: use type off_t* for object_info->disk_sizep
pack-objects: pass length to check_pack_crc() without truncation

builtin/cat-file.c
builtin/fsck.c
builtin/index-pack.c
builtin/pack-objects.c
cache.h
pack-check.c
pack.h
sha1_file.c
t/t1050-large.sh
index 618103fdeeb7f7b35911ebd12d0d811cc4a5297a..2dfe6265f7df6099b51645fa67dbeeb3f4f567a4 100644 (file)
@@ -131,7 +131,7 @@ struct expand_data {
        unsigned char sha1[20];
        enum object_type type;
        unsigned long size;
-       unsigned long disk_size;
+       off_t disk_size;
        const char *rest;
        unsigned char delta_base_sha1[20];
 
@@ -191,7 +191,7 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len,
                if (data->mark_query)
                        data->info.disk_sizep = &data->disk_size;
                else
-                       strbuf_addf(sb, "%lu", data->disk_size);
+                       strbuf_addf(sb, "%"PRIuMAX, (uintmax_t)data->disk_size);
        } else if (is_atom("rest", atom, len)) {
                if (data->mark_query)
                        data->split_on_whitespace = 1;
index c6d17e63fd832d5b37ea93e523cfb774b563c452..2de272ea3659411b58d38d703a48790d55d6a12b 100644 (file)
@@ -377,6 +377,10 @@ static int fsck_sha1(const unsigned char *sha1)
 static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
                           unsigned long size, void *buffer, int *eaten)
 {
+       /*
+        * Note, buffer may be NULL if type is OBJ_BLOB. See
+        * verify_packfile(), data_valid variable for details.
+        */
        struct object *obj;
        obj = parse_object_buffer(sha1, type, size, buffer, eaten);
        if (!obj) {
index e8c71fc1d2e44ef9bd93c37ff714eeefc53664cf..1008d7f63cab33b162da43f5a8b13cd6e1aea88b 100644 (file)
@@ -338,10 +338,10 @@ static void parse_pack_header(void)
        use(sizeof(struct pack_header));
 }
 
-static NORETURN void bad_object(unsigned long offset, const char *format,
+static NORETURN void bad_object(off_t offset, const char *format,
                       ...) __attribute__((format (printf, 2, 3)));
 
-static NORETURN void bad_object(unsigned long offset, const char *format, ...)
+static NORETURN void bad_object(off_t offset, const char *format, ...)
 {
        va_list params;
        char buf[1024];
@@ -349,7 +349,8 @@ static NORETURN void bad_object(unsigned long offset, const char *format, ...)
        va_start(params, format);
        vsnprintf(buf, sizeof(buf), format, params);
        va_end(params);
-       die(_("pack has bad object at offset %lu: %s"), offset, buf);
+       die(_("pack has bad object at offset %"PRIuMAX": %s"),
+           (uintmax_t)offset, buf);
 }
 
 static inline struct thread_local *get_thread_data(void)
@@ -429,7 +430,7 @@ static int is_delta_type(enum object_type type)
        return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
 }
 
-static void *unpack_entry_data(unsigned long offset, unsigned long size,
+static void *unpack_entry_data(off_t offset, unsigned long size,
                               enum object_type type, unsigned char *sha1)
 {
        static char fixed_buf[8192];
@@ -549,13 +550,13 @@ static void *unpack_data(struct object_entry *obj,
                         void *cb_data)
 {
        off_t from = obj[0].idx.offset + obj[0].hdr_size;
-       unsigned long len = obj[1].idx.offset - from;
+       off_t len = obj[1].idx.offset - from;
        unsigned char *data, *inbuf;
        git_zstream stream;
        int status;
 
        data = xmallocz(consume ? 64*1024 : obj->size);
-       inbuf = xmalloc((len < 64*1024) ? len : 64*1024);
+       inbuf = xmalloc((len < 64*1024) ? (int)len : 64*1024);
 
        memset(&stream, 0, sizeof(stream));
        git_inflate_init(&stream);
@@ -563,15 +564,15 @@ static void *unpack_data(struct object_entry *obj,
        stream.avail_out = consume ? 64*1024 : obj->size;
 
        do {
-               ssize_t n = (len < 64*1024) ? len : 64*1024;
+               ssize_t n = (len < 64*1024) ? (ssize_t)len : 64*1024;
                n = xpread(get_thread_data()->pack_fd, inbuf, n, from);
                if (n < 0)
                        die_errno(_("cannot pread pack file"));
                if (!n)
-                       die(Q_("premature end of pack file, %lu byte missing",
-                              "premature end of pack file, %lu bytes missing",
-                              len),
-                           len);
+                       die(Q_("premature end of pack file, %"PRIuMAX" byte missing",
+                              "premature end of pack file, %"PRIuMAX" bytes missing",
+                              (unsigned int)len),
+                           (uintmax_t)len);
                from += n;
                len -= n;
                stream.next_in = inbuf;
index a2f8cfdec0d4034c20f3307c390cec9dec6d4a18..92e2e5f7a8190c546f2c0cc9bc9724a04d330052 100644 (file)
@@ -342,15 +342,15 @@ static unsigned long write_no_reuse_object(struct sha1file *f, struct object_ent
 }
 
 /* Return 0 if we will bust the pack-size limit */
-static unsigned long write_reuse_object(struct sha1file *f, struct object_entry *entry,
-                                       unsigned long limit, int usable_delta)
+static off_t write_reuse_object(struct sha1file *f, struct object_entry *entry,
+                               unsigned long limit, int usable_delta)
 {
        struct packed_git *p = entry->in_pack;
        struct pack_window *w_curs = NULL;
        struct revindex_entry *revidx;
        off_t offset;
        enum object_type type = entry->type;
-       unsigned long datalen;
+       off_t datalen;
        unsigned char header[10], dheader[10];
        unsigned hdrlen;
 
@@ -416,11 +416,12 @@ static unsigned long write_reuse_object(struct sha1file *f, struct object_entry
 }
 
 /* Return 0 if we will bust the pack-size limit */
-static unsigned long write_object(struct sha1file *f,
-                                 struct object_entry *entry,
-                                 off_t write_offset)
+static off_t write_object(struct sha1file *f,
+                         struct object_entry *entry,
+                         off_t write_offset)
 {
-       unsigned long limit, len;
+       unsigned long limit;
+       off_t len;
        int usable_delta, to_reuse;
 
        if (!pack_to_stdout)
@@ -492,7 +493,7 @@ static enum write_one_status write_one(struct sha1file *f,
                                       struct object_entry *e,
                                       off_t *offset)
 {
-       unsigned long size;
+       off_t size;
        int recursing;
 
        /*
diff --git a/cache.h b/cache.h
index 3855ddfbe659569a2156b8783d53f7838b6a940f..b5f76a4cf4b1599f024f14553dd8c99a63abf77c 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -1515,7 +1515,7 @@ struct object_info {
        /* Request */
        enum object_type *typep;
        unsigned long *sizep;
-       unsigned long *disk_sizep;
+       off_t *disk_sizep;
        unsigned char *delta_base_sha1;
        struct strbuf *typename;
 
index 1da89a41cec9e605fc3fdfa8efaaf2489e501c88..d123846ea2be7c34360049864aeb3a88f505dfd1 100644 (file)
@@ -105,6 +105,8 @@ static int verify_packfile(struct packed_git *p,
                void *data;
                enum object_type type;
                unsigned long size;
+               off_t curpos;
+               int data_valid;
 
                if (p->index_version > 1) {
                        off_t offset = entries[i].offset;
@@ -116,8 +118,25 @@ static int verify_packfile(struct packed_git *p,
                                            sha1_to_hex(entries[i].sha1),
                                            p->pack_name, (uintmax_t)offset);
                }
-               data = unpack_entry(p, entries[i].offset, &type, &size);
-               if (!data)
+
+               curpos = entries[i].offset;
+               type = unpack_object_header(p, w_curs, &curpos, &size);
+               unuse_pack(w_curs);
+
+               if (type == OBJ_BLOB && big_file_threshold <= size) {
+                       /*
+                        * Let check_sha1_signature() check it with
+                        * the streaming interface; no point slurping
+                        * the data in-core only to discard.
+                        */
+                       data = NULL;
+                       data_valid = 0;
+               } else {
+                       data = unpack_entry(p, entries[i].offset, &type, &size);
+                       data_valid = 1;
+               }
+
+               if (data_valid && !data)
                        err = error("cannot unpack %s from %s at offset %"PRIuMAX"",
                                    sha1_to_hex(entries[i].sha1), p->pack_name,
                                    (uintmax_t)entries[i].offset);
diff --git a/pack.h b/pack.h
index 3223f5a0380f735509424bbdbad64097948ef85b..0e77429df5e53a753271843aa8abc16f38708ccc 100644 (file)
--- a/pack.h
+++ b/pack.h
@@ -74,6 +74,7 @@ struct pack_idx_entry {
 
 
 struct progress;
+/* Note, the data argument could be NULL if object type is blob */
 typedef int (*verify_fn)(const unsigned char*, enum object_type, unsigned long, void*, int*);
 
 extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, const unsigned char *sha1);
index d5e11217f523018008b3c4861d5d70068de14c72..cb571ac6e8ed0657e39b346b41961caa8cc825be 100644 (file)
@@ -2281,7 +2281,7 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
 
                if (do_check_packed_object_crc && p->index_version > 1) {
                        struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
-                       unsigned long len = revidx[1].offset - obj_offset;
+                       off_t len = revidx[1].offset - obj_offset;
                        if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
                                const unsigned char *sha1 =
                                        nth_packed_object_sha1(p, revidx->nr);
index f9f3d1391ff496da38e0ed25780d1fdae173c91d..096dbffecc3d51478b643bd2f4dee92f507e1c1a 100755 (executable)
@@ -177,10 +177,9 @@ test_expect_success 'zip achiving, deflate' '
        git archive --format=zip HEAD >/dev/null
 '
 
-test_expect_success 'fsck' '
-       test_must_fail git fsck 2>err &&
-       n=$(grep "error: attempting to allocate .* over limit" err | wc -l) &&
-       test "$n" -gt 1
+test_expect_success 'fsck large blobs' '
+       git fsck 2>err &&
+       test_must_be_empty err
 '
 
 test_done