pack-redundant: consistent sort method
[gitweb.git] / packfile.c
index ebcb5742ec748d730f8d730ad8b0744e9094d121..8c6b47cc777708d4f8e450337f2b9d205f43591b 100644 (file)
@@ -15,6 +15,7 @@
 #include "tree-walk.h"
 #include "tree.h"
 #include "object-store.h"
+#include "midx.h"
 
 char *odb_pack_name(struct strbuf *buf,
                    const unsigned char *sha1,
@@ -79,10 +80,8 @@ void pack_report(void)
 static int check_packed_git_idx(const char *path, struct packed_git *p)
 {
        void *idx_map;
-       struct pack_idx_header *hdr;
        size_t idx_size;
-       uint32_t version, nr, i, *index;
-       int fd = git_open(path);
+       int fd = git_open(path), ret;
        struct stat st;
        const unsigned int hashsz = the_hash_algo->rawsz;
 
@@ -100,16 +99,32 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
        idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
        close(fd);
 
-       hdr = idx_map;
+       ret = load_idx(path, hashsz, idx_map, idx_size, p);
+
+       if (ret)
+               munmap(idx_map, idx_size);
+
+       return ret;
+}
+
+int load_idx(const char *path, const unsigned int hashsz, void *idx_map,
+            size_t idx_size, struct packed_git *p)
+{
+       struct pack_idx_header *hdr = idx_map;
+       uint32_t version, nr, i, *index;
+
+       if (idx_size < 4 * 256 + hashsz + hashsz)
+               return error("index file %s is too small", path);
+       if (idx_map == NULL)
+               return error("empty data");
+
        if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
                version = ntohl(hdr->idx_version);
-               if (version < 2 || version > 2) {
-                       munmap(idx_map, idx_size);
+               if (version < 2 || version > 2)
                        return error("index file %s is version %"PRIu32
                                     " and is not supported by this binary"
                                     " (try upgrading GIT to a newer version)",
                                     path, version);
-               }
        } else
                version = 1;
 
@@ -119,10 +134,8 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
                index += 2;  /* skip index header */
        for (i = 0; i < 256; i++) {
                uint32_t n = ntohl(index[i]);
-               if (n < nr) {
-                       munmap(idx_map, idx_size);
+               if (n < nr)
                        return error("non-monotonic index %s", path);
-               }
                nr = n;
        }
 
@@ -134,10 +147,8 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
                 *  - hash of the packfile
                 *  - file checksum
                 */
-               if (idx_size != 4*256 + nr * (hashsz + 4) + hashsz + hashsz) {
-                       munmap(idx_map, idx_size);
+               if (idx_size != 4 * 256 + nr * (hashsz + 4) + hashsz + hashsz)
                        return error("wrong index v1 file size in %s", path);
-               }
        } else if (version == 2) {
                /*
                 * Minimum size:
@@ -156,20 +167,16 @@ static int check_packed_git_idx(const char *path, struct packed_git *p)
                unsigned long max_size = min_size;
                if (nr)
                        max_size += (nr - 1)*8;
-               if (idx_size < min_size || idx_size > max_size) {
-                       munmap(idx_map, idx_size);
+               if (idx_size < min_size || idx_size > max_size)
                        return error("wrong index v2 file size in %s", path);
-               }
                if (idx_size != min_size &&
                    /*
                     * make sure we can deal with large pack offsets.
                     * 31-bit signed offset won't be enough, neither
                     * 32-bit unsigned one will be.
                     */
-                   (sizeof(off_t) <= 4)) {
-                       munmap(idx_map, idx_size);
+                   (sizeof(off_t) <= 4))
                        return error("pack too large for current definition of off_t in %s", path);
-               }
        }
 
        p->index_version = version;
@@ -196,6 +203,23 @@ int open_pack_index(struct packed_git *p)
        return ret;
 }
 
+uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)
+{
+       const uint32_t *level1_ofs = p->index_data;
+
+       if (!level1_ofs) {
+               if (open_pack_index(p))
+                       return 0;
+               level1_ofs = p->index_data;
+       }
+
+       if (p->index_version > 1) {
+               level1_ofs += 2;
+       }
+
+       return ntohl(level1_ofs[value]);
+}
+
 static struct packed_git *alloc_packed_git(int extra)
 {
        struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
@@ -321,6 +345,11 @@ void close_all_packs(struct raw_object_store *o)
                        BUG("want to close pack marked 'do-not-close'");
                else
                        close_pack(p);
+
+       if (o->multi_pack_index) {
+               close_midx(o->multi_pack_index);
+               o->multi_pack_index = NULL;
+       }
 }
 
 /*
@@ -451,8 +480,19 @@ static int open_packed_git_1(struct packed_git *p)
        ssize_t read_result;
        const unsigned hashsz = the_hash_algo->rawsz;
 
-       if (!p->index_data && open_pack_index(p))
-               return error("packfile %s index unavailable", p->pack_name);
+       if (!p->index_data) {
+               struct multi_pack_index *m;
+               const char *pack_name = strrchr(p->pack_name, '/');
+
+               for (m = the_repository->objects->multi_pack_index;
+                    m; m = m->next) {
+                       if (midx_contains_pack(m, pack_name))
+                               break;
+               }
+
+               if (!m && open_pack_index(p))
+                       return error("packfile %s index unavailable", p->pack_name);
+       }
 
        if (!pack_max_fds) {
                unsigned int max_fds = get_max_fd_limit();
@@ -503,6 +543,10 @@ static int open_packed_git_1(struct packed_git *p)
                        " supported (try upgrading GIT to a newer version)",
                        p->pack_name, ntohl(hdr.hdr_version));
 
+       /* Skip index checking if in multi-pack-index */
+       if (!p->index_data)
+               return 0;
+
        /* Verify the pack matches its index. */
        if (p->num_objects != ntohl(hdr.hdr_entries))
                return error("packfile %s claims to have %"PRIu32" objects"
@@ -517,7 +561,7 @@ static int open_packed_git_1(struct packed_git *p)
        if (read_result != hashsz)
                return error("packfile %s signature is unavailable", p->pack_name);
        idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;
-       if (hashcmp(hash, idx_hash))
+       if (!hasheq(hash, idx_hash))
                return error("packfile %s does not match index", p->pack_name);
        return 0;
 }
@@ -738,13 +782,14 @@ static void report_pack_garbage(struct string_list *list)
        report_helper(list, seen_bits, first, list->nr);
 }
 
-static void prepare_packed_git_one(struct repository *r, char *objdir, int local)
+void for_each_file_in_pack_dir(const char *objdir,
+                              each_file_in_pack_dir_fn fn,
+                              void *data)
 {
        struct strbuf path = STRBUF_INIT;
        size_t dirnamelen;
        DIR *dir;
        struct dirent *de;
-       struct string_list garbage = STRING_LIST_INIT_DUP;
 
        strbuf_addstr(&path, objdir);
        strbuf_addstr(&path, "/pack");
@@ -759,53 +804,87 @@ static void prepare_packed_git_one(struct repository *r, char *objdir, int local
        strbuf_addch(&path, '/');
        dirnamelen = path.len;
        while ((de = readdir(dir)) != NULL) {
-               struct packed_git *p;
-               size_t base_len;
-
                if (is_dot_or_dotdot(de->d_name))
                        continue;
 
                strbuf_setlen(&path, dirnamelen);
                strbuf_addstr(&path, de->d_name);
 
-               base_len = path.len;
-               if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
-                       /* Don't reopen a pack we already have. */
-                       for (p = r->objects->packed_git; p;
-                            p = p->next) {
-                               size_t len;
-                               if (strip_suffix(p->pack_name, ".pack", &len) &&
-                                   len == base_len &&
-                                   !memcmp(p->pack_name, path.buf, len))
-                                       break;
-                       }
-                       if (p == NULL &&
-                           /*
-                            * See if it really is a valid .idx file with
-                            * corresponding .pack file that we can map.
-                            */
-                           (p = add_packed_git(path.buf, path.len, local)) != NULL)
-                               install_packed_git(r, p);
-               }
-
-               if (!report_garbage)
-                       continue;
-
-               if (ends_with(de->d_name, ".idx") ||
-                   ends_with(de->d_name, ".pack") ||
-                   ends_with(de->d_name, ".bitmap") ||
-                   ends_with(de->d_name, ".keep") ||
-                   ends_with(de->d_name, ".promisor"))
-                       string_list_append(&garbage, path.buf);
-               else
-                       report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
+               fn(path.buf, path.len, de->d_name, data);
        }
+
        closedir(dir);
-       report_pack_garbage(&garbage);
-       string_list_clear(&garbage, 0);
        strbuf_release(&path);
 }
 
+struct prepare_pack_data {
+       struct repository *r;
+       struct string_list *garbage;
+       int local;
+       struct multi_pack_index *m;
+};
+
+static void prepare_pack(const char *full_name, size_t full_name_len,
+                        const char *file_name, void *_data)
+{
+       struct prepare_pack_data *data = (struct prepare_pack_data *)_data;
+       struct packed_git *p;
+       size_t base_len = full_name_len;
+
+       if (strip_suffix_mem(full_name, &base_len, ".idx") &&
+           !(data->m && midx_contains_pack(data->m, file_name))) {
+               /* Don't reopen a pack we already have. */
+               for (p = data->r->objects->packed_git; p; p = p->next) {
+                       size_t len;
+                       if (strip_suffix(p->pack_name, ".pack", &len) &&
+                           len == base_len &&
+                           !memcmp(p->pack_name, full_name, len))
+                               break;
+               }
+
+               if (!p) {
+                       p = add_packed_git(full_name, full_name_len, data->local);
+                       if (p)
+                               install_packed_git(data->r, p);
+               }
+       }
+
+       if (!report_garbage)
+               return;
+
+       if (!strcmp(file_name, "multi-pack-index"))
+               return;
+       if (ends_with(file_name, ".idx") ||
+           ends_with(file_name, ".pack") ||
+           ends_with(file_name, ".bitmap") ||
+           ends_with(file_name, ".keep") ||
+           ends_with(file_name, ".promisor"))
+               string_list_append(data->garbage, full_name);
+       else
+               report_garbage(PACKDIR_FILE_GARBAGE, full_name);
+}
+
+static void prepare_packed_git_one(struct repository *r, char *objdir, int local)
+{
+       struct prepare_pack_data data;
+       struct string_list garbage = STRING_LIST_INIT_DUP;
+
+       data.m = r->objects->multi_pack_index;
+
+       /* look for the multi-pack-index for this object directory */
+       while (data.m && strcmp(data.m->object_dir, objdir))
+               data.m = data.m->next;
+
+       data.r = r;
+       data.garbage = &garbage;
+       data.local = local;
+
+       for_each_file_in_pack_dir(objdir, prepare_pack, &data);
+
+       report_pack_garbage(data.garbage);
+       string_list_clear(data.garbage, 0);
+}
+
 static void prepare_packed_git(struct repository *r);
 /*
  * Give a fast, rough count of the number of objects in the repository. This
@@ -818,10 +897,13 @@ unsigned long approximate_object_count(void)
 {
        if (!the_repository->objects->approximate_object_count_valid) {
                unsigned long count;
+               struct multi_pack_index *m;
                struct packed_git *p;
 
                prepare_packed_git(the_repository);
                count = 0;
+               for (m = get_multi_pack_index(the_repository); m; m = m->next)
+                       count += m->num_objects;
                for (p = the_repository->objects->packed_git; p; p = p->next) {
                        if (open_pack_index(p))
                                continue;
@@ -889,21 +971,35 @@ static void prepare_packed_git_mru(struct repository *r)
 
 static void prepare_packed_git(struct repository *r)
 {
-       struct alternate_object_database *alt;
+       struct object_directory *odb;
 
        if (r->objects->packed_git_initialized)
                return;
-       prepare_packed_git_one(r, r->objects->objectdir, 1);
+
        prepare_alt_odb(r);
-       for (alt = r->objects->alt_odb_list; alt; alt = alt->next)
-               prepare_packed_git_one(r, alt->path, 0);
+       for (odb = r->objects->odb; odb; odb = odb->next) {
+               int local = (odb == r->objects->odb);
+               prepare_multi_pack_index_one(r, odb->path, local);
+               prepare_packed_git_one(r, odb->path, local);
+       }
        rearrange_packed_git(r);
+
+       r->objects->all_packs = NULL;
+
        prepare_packed_git_mru(r);
        r->objects->packed_git_initialized = 1;
 }
 
 void reprepare_packed_git(struct repository *r)
 {
+       struct object_directory *odb;
+
+       for (odb = r->objects->odb; odb; odb = odb->next) {
+               oid_array_clear(&odb->loose_objects_cache);
+               memset(&odb->loose_objects_subdir_seen, 0,
+                      sizeof(odb->loose_objects_subdir_seen));
+       }
+
        r->objects->approximate_object_count_valid = 0;
        r->objects->packed_git_initialized = 0;
        prepare_packed_git(r);
@@ -915,6 +1011,36 @@ struct packed_git *get_packed_git(struct repository *r)
        return r->objects->packed_git;
 }
 
+struct multi_pack_index *get_multi_pack_index(struct repository *r)
+{
+       prepare_packed_git(r);
+       return r->objects->multi_pack_index;
+}
+
+struct packed_git *get_all_packs(struct repository *r)
+{
+       prepare_packed_git(r);
+
+       if (!r->objects->all_packs) {
+               struct packed_git *p = r->objects->packed_git;
+               struct multi_pack_index *m;
+
+               for (m = r->objects->multi_pack_index; m; m = m->next) {
+                       uint32_t i;
+                       for (i = 0; i < m->num_packs; i++) {
+                               if (!prepare_midx_pack(m, i)) {
+                                       m->packs[i]->next = p;
+                                       p = m->packs[i];
+                               }
+                       }
+               }
+
+               r->objects->all_packs = p;
+       }
+
+       return r->objects->all_packs;
+}
+
 struct list_head *get_packed_git_mru(struct repository *r)
 {
        prepare_packed_git(r);
@@ -1014,13 +1140,14 @@ int unpack_object_header(struct packed_git *p,
 void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1)
 {
        unsigned i;
+       const unsigned hashsz = the_hash_algo->rawsz;
        for (i = 0; i < p->num_bad_objects; i++)
-               if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
+               if (hasheq(sha1, p->bad_object_sha1 + hashsz * i))
                        return;
        p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
                                      st_mult(GIT_MAX_RAWSZ,
                                              st_add(p->num_bad_objects, 1)));
-       hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
+       hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, sha1);
        p->num_bad_objects++;
 }
 
@@ -1031,8 +1158,8 @@ const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
 
        for (p = the_repository->objects->packed_git; p; p = p->next)
                for (i = 0; i < p->num_bad_objects; i++)
-                       if (!hashcmp(sha1,
-                                    p->bad_object_sha1 + the_hash_algo->rawsz * i))
+                       if (hasheq(sha1,
+                                  p->bad_object_sha1 + the_hash_algo->rawsz * i))
                                return p;
        return NULL;
 }
@@ -1830,8 +1957,8 @@ static int fill_pack_entry(const struct object_id *oid,
        if (p->num_bad_objects) {
                unsigned i;
                for (i = 0; i < p->num_bad_objects; i++)
-                       if (!hashcmp(oid->hash,
-                                    p->bad_object_sha1 + the_hash_algo->rawsz * i))
+                       if (hasheq(oid->hash,
+                                  p->bad_object_sha1 + the_hash_algo->rawsz * i))
                                return 0;
        }
 
@@ -1856,11 +1983,17 @@ static int fill_pack_entry(const struct object_id *oid,
 int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e)
 {
        struct list_head *pos;
+       struct multi_pack_index *m;
 
        prepare_packed_git(r);
-       if (!r->objects->packed_git)
+       if (!r->objects->packed_git && !r->objects->multi_pack_index)
                return 0;
 
+       for (m = r->objects->multi_pack_index; m; m = m->next) {
+               if (fill_midx_entry(oid, e, m))
+                       return 1;
+       }
+
        list_for_each(pos, &r->objects->packed_git_mru) {
                struct packed_git *p = list_entry(pos, struct packed_git, mru);
                if (fill_pack_entry(oid, e, p)) {
@@ -1923,7 +2056,7 @@ int for_each_packed_object(each_packed_object_fn cb, void *data,
        int pack_errors = 0;
 
        prepare_packed_git(the_repository);
-       for (p = the_repository->objects->packed_git; p; p = p->next) {
+       for (p = get_all_packs(the_repository); p; p = p->next) {
                if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
                        continue;
                if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&