Merge branch 'jk/maint-diff-grep-textconv'
[gitweb.git] / builtin / index-pack.c
index 470547835ca41888817b85dc4b7ed39273ede784..43d364b8d5e5b0cb4b78be517e78d64c96da4304 100644 (file)
@@ -9,6 +9,7 @@
 #include "progress.h"
 #include "fsck.h"
 #include "exec_cmd.h"
+#include "streaming.h"
 #include "thread-utils.h"
 
 static const char index_pack_usage[] =
@@ -290,7 +291,7 @@ static void parse_pack_header(void)
        if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
                die(_("pack signature mismatch"));
        if (!pack_version_ok(hdr->hdr_version))
-               die("pack version %"PRIu32" unsupported",
+               die(_("pack version %"PRIu32" unsupported"),
                        ntohl(hdr->hdr_version));
 
        nr_objects = ntohl(hdr->hdr_entries);
@@ -384,30 +385,62 @@ static void unlink_base_data(struct base_data *c)
        free_base_data(c);
 }
 
-static void *unpack_entry_data(unsigned long offset, unsigned long size)
+static int is_delta_type(enum object_type type)
+{
+       return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
+}
+
+static void *unpack_entry_data(unsigned long offset, unsigned long size,
+                              enum object_type type, unsigned char *sha1)
 {
+       static char fixed_buf[8192];
        int status;
        git_zstream stream;
-       void *buf = xmalloc(size);
+       void *buf;
+       git_SHA_CTX c;
+       char hdr[32];
+       int hdrlen;
+
+       if (!is_delta_type(type)) {
+               hdrlen = sprintf(hdr, "%s %lu", typename(type), size) + 1;
+               git_SHA1_Init(&c);
+               git_SHA1_Update(&c, hdr, hdrlen);
+       } else
+               sha1 = NULL;
+       if (type == OBJ_BLOB && size > big_file_threshold)
+               buf = fixed_buf;
+       else
+               buf = xmalloc(size);
 
        memset(&stream, 0, sizeof(stream));
        git_inflate_init(&stream);
        stream.next_out = buf;
-       stream.avail_out = size;
+       stream.avail_out = buf == fixed_buf ? sizeof(fixed_buf) : size;
 
        do {
+               unsigned char *last_out = stream.next_out;
                stream.next_in = fill(1);
                stream.avail_in = input_len;
                status = git_inflate(&stream, 0);
                use(input_len - stream.avail_in);
+               if (sha1)
+                       git_SHA1_Update(&c, last_out, stream.next_out - last_out);
+               if (buf == fixed_buf) {
+                       stream.next_out = buf;
+                       stream.avail_out = sizeof(fixed_buf);
+               }
        } while (status == Z_OK);
        if (stream.total_out != size || status != Z_STREAM_END)
                bad_object(offset, _("inflate returned %d"), status);
        git_inflate_end(&stream);
-       return buf;
+       if (sha1)
+               git_SHA1_Final(sha1, &c);
+       return buf == fixed_buf ? NULL : buf;
 }
 
-static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
+static void *unpack_raw_entry(struct object_entry *obj,
+                             union delta_base *delta_base,
+                             unsigned char *sha1)
 {
        unsigned char *p;
        unsigned long size, c;
@@ -467,12 +500,14 @@ static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_
        }
        obj->hdr_size = consumed_bytes - obj->idx.offset;
 
-       data = unpack_entry_data(obj->idx.offset, obj->size);
+       data = unpack_entry_data(obj->idx.offset, obj->size, obj->type, sha1);
        obj->idx.crc32 = input_crc32;
        return data;
 }
 
-static void *get_data_from_pack(struct object_entry *obj)
+static void *unpack_data(struct object_entry *obj,
+                        int (*consume)(const unsigned char *, unsigned long, void *),
+                        void *cb_data)
 {
        off_t from = obj[0].idx.offset + obj[0].hdr_size;
        unsigned long len = obj[1].idx.offset - from;
@@ -480,13 +515,13 @@ static void *get_data_from_pack(struct object_entry *obj)
        git_zstream stream;
        int status;
 
-       data = xmalloc(obj->size);
+       data = xmalloc(consume ? 64*1024 : obj->size);
        inbuf = xmalloc((len < 64*1024) ? len : 64*1024);
 
        memset(&stream, 0, sizeof(stream));
        git_inflate_init(&stream);
        stream.next_out = data;
-       stream.avail_out = obj->size;
+       stream.avail_out = consume ? 64*1024 : obj->size;
 
        do {
                ssize_t n = (len < 64*1024) ? len : 64*1024;
@@ -502,7 +537,20 @@ static void *get_data_from_pack(struct object_entry *obj)
                len -= n;
                stream.next_in = inbuf;
                stream.avail_in = n;
-               status = git_inflate(&stream, 0);
+               if (!consume)
+                       status = git_inflate(&stream, 0);
+               else {
+                       do {
+                               status = git_inflate(&stream, 0);
+                               if (consume(data, stream.next_out - data, cb_data)) {
+                                       free(inbuf);
+                                       free(data);
+                                       return NULL;
+                               }
+                               stream.next_out = data;
+                               stream.avail_out = 64*1024;
+                       } while (status == Z_OK && stream.avail_in);
+               }
        } while (len && status == Z_OK && !stream.avail_in);
 
        /* This has been inflated OK when first encountered, so... */
@@ -511,9 +559,18 @@ static void *get_data_from_pack(struct object_entry *obj)
 
        git_inflate_end(&stream);
        free(inbuf);
+       if (consume) {
+               free(data);
+               data = NULL;
+       }
        return data;
 }
 
+static void *get_data_from_pack(struct object_entry *obj)
+{
+       return unpack_data(obj, NULL, NULL);
+}
+
 static int compare_delta_bases(const union delta_base *base1,
                               const union delta_base *base2,
                               enum object_type type1,
@@ -568,25 +625,102 @@ static void find_delta_children(const union delta_base *base,
        *last_index = last;
 }
 
-static void sha1_object(const void *data, unsigned long size,
-                       enum object_type type, unsigned char *sha1)
+struct compare_data {
+       struct object_entry *entry;
+       struct git_istream *st;
+       unsigned char *buf;
+       unsigned long buf_size;
+};
+
+static int compare_objects(const unsigned char *buf, unsigned long size,
+                          void *cb_data)
+{
+       struct compare_data *data = cb_data;
+
+       if (data->buf_size < size) {
+               free(data->buf);
+               data->buf = xmalloc(size);
+               data->buf_size = size;
+       }
+
+       while (size) {
+               ssize_t len = read_istream(data->st, data->buf, size);
+               if (len == 0)
+                       die(_("SHA1 COLLISION FOUND WITH %s !"),
+                           sha1_to_hex(data->entry->idx.sha1));
+               if (len < 0)
+                       die(_("unable to read %s"),
+                           sha1_to_hex(data->entry->idx.sha1));
+               if (memcmp(buf, data->buf, len))
+                       die(_("SHA1 COLLISION FOUND WITH %s !"),
+                           sha1_to_hex(data->entry->idx.sha1));
+               size -= len;
+               buf += len;
+       }
+       return 0;
+}
+
+static int check_collison(struct object_entry *entry)
+{
+       struct compare_data data;
+       enum object_type type;
+       unsigned long size;
+
+       if (entry->size <= big_file_threshold || entry->type != OBJ_BLOB)
+               return -1;
+
+       memset(&data, 0, sizeof(data));
+       data.entry = entry;
+       data.st = open_istream(entry->idx.sha1, &type, &size, NULL);
+       if (!data.st)
+               return -1;
+       if (size != entry->size || type != entry->type)
+               die(_("SHA1 COLLISION FOUND WITH %s !"),
+                   sha1_to_hex(entry->idx.sha1));
+       unpack_data(entry, compare_objects, &data);
+       close_istream(data.st);
+       free(data.buf);
+       return 0;
+}
+
+static void sha1_object(const void *data, struct object_entry *obj_entry,
+                       unsigned long size, enum object_type type,
+                       const unsigned char *sha1)
 {
-       hash_sha1_file(data, size, typename(type), sha1);
+       void *new_data = NULL;
+       int collision_test_needed;
+
+       assert(data || obj_entry);
+
        read_lock();
-       if (has_sha1_file(sha1)) {
+       collision_test_needed = has_sha1_file(sha1);
+       read_unlock();
+
+       if (collision_test_needed && !data) {
+               read_lock();
+               if (!check_collison(obj_entry))
+                       collision_test_needed = 0;
+               read_unlock();
+       }
+       if (collision_test_needed) {
                void *has_data;
                enum object_type has_type;
                unsigned long has_size;
+               read_lock();
+               has_type = sha1_object_info(sha1, &has_size);
+               if (has_type != type || has_size != size)
+                       die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(sha1));
                has_data = read_sha1_file(sha1, &has_type, &has_size);
                read_unlock();
+               if (!data)
+                       data = new_data = get_data_from_pack(obj_entry);
                if (!has_data)
                        die(_("cannot read existing object %s"), sha1_to_hex(sha1));
                if (size != has_size || type != has_type ||
                    memcmp(data, has_data, size) != 0)
                        die(_("SHA1 COLLISION FOUND WITH %s !"), sha1_to_hex(sha1));
                free(has_data);
-       } else
-               read_unlock();
+       }
 
        if (strict) {
                read_lock();
@@ -601,6 +735,9 @@ static void sha1_object(const void *data, unsigned long size,
                        int eaten;
                        void *buf = (void *) data;
 
+                       if (!buf)
+                               buf = new_data = get_data_from_pack(obj_entry);
+
                        /*
                         * we do not need to free the memory here, as the
                         * buf is deleted by the caller.
@@ -625,11 +762,8 @@ static void sha1_object(const void *data, unsigned long size,
                }
                read_unlock();
        }
-}
 
-static int is_delta_type(enum object_type type)
-{
-       return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
+       free(new_data);
 }
 
 /*
@@ -711,7 +845,9 @@ static void resolve_delta(struct object_entry *delta_obj,
        free(delta_data);
        if (!result->data)
                bad_object(delta_obj->idx.offset, _("failed to apply delta"));
-       sha1_object(result->data, result->size, delta_obj->real_type,
+       hash_sha1_file(result->data, result->size,
+                      typename(delta_obj->real_type), delta_obj->idx.sha1);
+       sha1_object(result->data, NULL, result->size, delta_obj->real_type,
                    delta_obj->idx.sha1);
        counter_lock();
        nr_resolved_deltas++;
@@ -841,7 +977,7 @@ static void *threaded_second_pass(void *data)
  */
 static void parse_pack_objects(unsigned char *sha1)
 {
-       int i;
+       int i, nr_delays = 0;
        struct delta_entry *delta = deltas;
        struct stat st;
 
@@ -851,14 +987,18 @@ static void parse_pack_objects(unsigned char *sha1)
                                nr_objects);
        for (i = 0; i < nr_objects; i++) {
                struct object_entry *obj = &objects[i];
-               void *data = unpack_raw_entry(obj, &delta->base);
+               void *data = unpack_raw_entry(obj, &delta->base, obj->idx.sha1);
                obj->real_type = obj->type;
                if (is_delta_type(obj->type)) {
                        nr_deltas++;
                        delta->obj_no = i;
                        delta++;
+               } else if (!data) {
+                       /* large blobs, check later */
+                       obj->real_type = OBJ_BAD;
+                       nr_delays++;
                } else
-                       sha1_object(data, obj->size, obj->type, obj->idx.sha1);
+                       sha1_object(data, NULL, obj->size, obj->type, obj->idx.sha1);
                free(data);
                display_progress(progress, i+1);
        }
@@ -878,6 +1018,17 @@ static void parse_pack_objects(unsigned char *sha1)
        if (S_ISREG(st.st_mode) &&
                        lseek(input_fd, 0, SEEK_CUR) - input_len != st.st_size)
                die(_("pack has junk at the end"));
+
+       for (i = 0; i < nr_objects; i++) {
+               struct object_entry *obj = &objects[i];
+               if (obj->real_type != OBJ_BAD)
+                       continue;
+               obj->real_type = obj->type;
+               sha1_object(NULL, obj, obj->size, obj->type, obj->idx.sha1);
+               nr_delays--;
+       }
+       if (nr_delays)
+               die(_("confusion beyond insanity in parse_pack_objects()"));
 }
 
 /*
@@ -910,7 +1061,8 @@ static void resolve_deltas(void)
                        int ret = pthread_create(&thread_data[i].thread, NULL,
                                                 threaded_second_pass, thread_data + i);
                        if (ret)
-                               die("unable to create thread: %s", strerror(ret));
+                               die(_("unable to create thread: %s"),
+                                   strerror(ret));
                }
                for (i = 0; i < nr_threads; i++)
                        pthread_join(thread_data[i].thread, NULL);
@@ -957,7 +1109,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
                                   * sizeof(*objects));
                f = sha1fd(output_fd, curr_pack);
                fix_unresolved_deltas(f, nr_unresolved);
-               sprintf(msg, "completed with %d local objects",
+               sprintf(msg, _("completed with %d local objects"),
                        nr_objects - nr_objects_initial);
                stop_progress_msg(&progress, msg);
                sha1close(f, tail_sha1, 0);
@@ -966,8 +1118,8 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
                                         curr_pack, nr_objects,
                                         read_sha1, consumed_bytes-20);
                if (hashcmp(read_sha1, tail_sha1) != 0)
-                       die("Unexpected tail checksum for %s "
-                           "(disk corruption?)", curr_pack);
+                       die(_("Unexpected tail checksum for %s "
+                             "(disk corruption?)"), curr_pack);
        }
        if (nr_deltas != nr_resolved_deltas)
                die(Q_("pack has %d unresolved delta",
@@ -1176,17 +1328,17 @@ static int git_index_pack_config(const char *k, const char *v, void *cb)
        if (!strcmp(k, "pack.indexversion")) {
                opts->version = git_config_int(k, v);
                if (opts->version > 2)
-                       die("bad pack.indexversion=%"PRIu32, opts->version);
+                       die(_("bad pack.indexversion=%"PRIu32), opts->version);
                return 0;
        }
        if (!strcmp(k, "pack.threads")) {
                nr_threads = git_config_int(k, v);
                if (nr_threads < 0)
-                       die("invalid number of threads specified (%d)",
+                       die(_("invalid number of threads specified (%d)"),
                            nr_threads);
 #ifdef NO_PTHREADS
                if (nr_threads != 1)
-                       warning("no threads support, ignoring %s", k);
+                       warning(_("no threads support, ignoring %s"), k);
                nr_threads = 1;
 #endif
                return 0;
@@ -1359,8 +1511,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
                                        usage(index_pack_usage);
 #ifdef NO_PTHREADS
                                if (nr_threads != 1)
-                                       warning("no threads support, "
-                                               "ignoring %s", arg);
+                                       warning(_("no threads support, "
+                                                 "ignoring %s"), arg);
                                nr_threads = 1;
 #endif
                        } else if (!prefixcmp(arg, "--pack_header=")) {