#include "lockfile.h"
#include "packfile.h"
#include "object-store.h"
+#include "packfile.h"
#include "midx.h"
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
packs->list[packs->nr] = add_packed_git(full_path,
full_path_len,
0);
+
if (!packs->list[packs->nr]) {
warning(_("failed to add packfile '%s'"),
full_path);
return;
}
+ if (open_pack_index(packs->list[packs->nr])) {
+ warning(_("failed to open pack-index '%s'"),
+ full_path);
+ close_pack(packs->list[packs->nr]);
+ FREE_AND_NULL(packs->list[packs->nr]);
+ return;
+ }
+
packs->names[packs->nr] = xstrdup(file_name);
packs->pack_name_concat_len += strlen(file_name) + 1;
packs->nr++;
free(pairs);
}
+struct pack_midx_entry {
+ struct object_id oid;
+ uint32_t pack_int_id;
+ time_t pack_mtime;
+ uint64_t offset;
+};
+
+static int midx_oid_compare(const void *_a, const void *_b)
+{
+ const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
+ const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
+ int cmp = oidcmp(&a->oid, &b->oid);
+
+ if (cmp)
+ return cmp;
+
+ if (a->pack_mtime > b->pack_mtime)
+ return -1;
+ else if (a->pack_mtime < b->pack_mtime)
+ return 1;
+
+ return a->pack_int_id - b->pack_int_id;
+}
+
+static void fill_pack_entry(uint32_t pack_int_id,
+ struct packed_git *p,
+ uint32_t cur_object,
+ struct pack_midx_entry *entry)
+{
+ if (!nth_packed_object_oid(&entry->oid, p, cur_object))
+ die(_("failed to locate object %d in packfile"), cur_object);
+
+ entry->pack_int_id = pack_int_id;
+ entry->pack_mtime = p->mtime;
+
+ entry->offset = nth_packed_object_offset(p, cur_object);
+}
+
+/*
+ * It is possible to artificially get into a state where there are many
+ * duplicate copies of objects. That can create high memory pressure if
+ * we are to create a list of all objects before de-duplication. To reduce
+ * this memory pressure without a significant performance drop, automatically
+ * group objects by the first byte of their object id. Use the IDX fanout
+ * tables to group the data, copy to a local array, then sort.
+ *
+ * Copy only the de-duplicated entries (selected by most-recent modified time
+ * of a packfile containing the object).
+ */
+static struct pack_midx_entry *get_sorted_entries(struct packed_git **p,
+ uint32_t *perm,
+ uint32_t nr_packs,
+ uint32_t *nr_objects)
+{
+ uint32_t cur_fanout, cur_pack, cur_object;
+ uint32_t alloc_fanout, alloc_objects, total_objects = 0;
+ struct pack_midx_entry *entries_by_fanout = NULL;
+ struct pack_midx_entry *deduplicated_entries = NULL;
+
+ for (cur_pack = 0; cur_pack < nr_packs; cur_pack++)
+ total_objects += p[cur_pack]->num_objects;
+
+ /*
+ * As we de-duplicate by fanout value, we expect the fanout
+ * slices to be evenly distributed, with some noise. Hence,
+ * allocate slightly more than one 256th.
+ */
+ alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16;
+
+ ALLOC_ARRAY(entries_by_fanout, alloc_fanout);
+ ALLOC_ARRAY(deduplicated_entries, alloc_objects);
+ *nr_objects = 0;
+
+ for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
+ uint32_t nr_fanout = 0;
+
+ for (cur_pack = 0; cur_pack < nr_packs; cur_pack++) {
+ uint32_t start = 0, end;
+
+ if (cur_fanout)
+ start = get_pack_fanout(p[cur_pack], cur_fanout - 1);
+ end = get_pack_fanout(p[cur_pack], cur_fanout);
+
+ for (cur_object = start; cur_object < end; cur_object++) {
+ ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
+ fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]);
+ nr_fanout++;
+ }
+ }
+
+ QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
+
+ /*
+ * The batch is now sorted by OID and then mtime (descending).
+ * Take only the first duplicate.
+ */
+ for (cur_object = 0; cur_object < nr_fanout; cur_object++) {
+ if (cur_object && !oidcmp(&entries_by_fanout[cur_object - 1].oid,
+ &entries_by_fanout[cur_object].oid))
+ continue;
+
+ ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects);
+ memcpy(&deduplicated_entries[*nr_objects],
+ &entries_by_fanout[cur_object],
+ sizeof(struct pack_midx_entry));
+ (*nr_objects)++;
+ }
+ }
+
+ free(entries_by_fanout);
+ return deduplicated_entries;
+}
+
static size_t write_midx_pack_names(struct hashfile *f,
char **pack_names,
uint32_t num_packs)
uint64_t written = 0;
uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];
+ uint32_t nr_entries;
+ struct pack_midx_entry *entries = NULL;
midx_name = get_midx_filename(object_dir);
if (safe_create_leading_directories(midx_name)) {
ALLOC_ARRAY(pack_perm, packs.nr);
sort_packs_by_name(packs.names, packs.nr, pack_perm);
+ entries = get_sorted_entries(packs.list, pack_perm, packs.nr, &nr_entries);
+
hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
FREE_AND_NULL(midx_name);
free(packs.list);
free(packs.names);
+ free(entries);
return 0;
}