space and extra time spent on the initial repack. Defaults to
false.
+pack.writeBitmapHashCache::
+ When true, git will include a "hash cache" section in the bitmap
+ index (if one is written). This cache can be used to feed git's
+ delta heuristics, potentially leading to better deltas between
+ bitmapped and non-bitmapped objects (e.g., when serving a fetch
+ between an older, bitmapped pack and objects that have been
+ pushed since the last gc). The downside is that it consumes 4
+ bytes per object of disk space, and that JGit's bitmap
+ implementation does not understand it, causing it to complain if
+ Git and JGit are used on the same repository. Defaults to false.
+
pager.<cmd>::
If the value is boolean, turns on or off pagination of the
output of a particular Git subcommand when writing to a tty.
requirement for the bitmap index format, also present in JGit,
that greatly reduces the complexity of the implementation.
+ - BITMAP_OPT_HASH_CACHE (0x4)
+ If present, the end of the bitmap file contains
+ `N` 32-bit name-hash values, one per object in the
+ pack. The format and meaning of the name-hash is
+ described below.
+
4-byte entry count (network byte order)
The total count of entries (bitmapped commits) in this bitmap index.
The next word after `L_M` (if any) must again be a RLW, for the next
chunk. For efficient appending to the bitstream, the EWAH stores a
pointer to the last RLW in the stream.
+
+
+== Appendix B: Optional Bitmap Sections
+
+These sections may or may not be present in the `.bitmap` file; their
+presence is indicated by the header flags section described above.
+
+Name-hash cache
+---------------
+
+If the BITMAP_OPT_HASH_CACHE flag is set, the end of the bitmap contains
+a cache of 32-bit values, one per object in the pack. The value at
+position `i` is the hash of the pathname at which the `i`th object
+(counting in index order) in the pack can be found. This can be fed
+into the delta heuristics to compare objects with similar pathnames.
+
+The hash algorithm used is:
+
+ hash = 0;
+ while ((c = *name++))
+ if (!isspace(c))
+ hash = (hash >> 2) + (c << 24);
+
+Note that this hashing scheme is tied to the BITMAP_OPT_HASH_CACHE flag.
+If implementations want to choose a different hashing scheme, they are
+free to do so, but MUST allocate a new header flag (because comparing
+hashes made under two different schemes would be pointless).
static int use_bitmap_index = 1;
static int write_bitmap_index;
+static uint16_t write_bitmap_options;
static unsigned long delta_cache_size = 0;
static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
bitmap_writer_reuse_bitmaps(&to_pack);
bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
bitmap_writer_build(&to_pack);
- bitmap_writer_finish(written_list, nr_written, tmpname);
+ bitmap_writer_finish(written_list, nr_written,
+ tmpname, write_bitmap_options);
write_bitmap_index = 0;
}
write_bitmap_index = git_config_bool(k, v);
return 0;
}
+ if (!strcmp(k, "pack.writebitmaphashcache")) {
+ if (git_config_bool(k, v))
+ write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
+ else
+ write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
+ }
if (!strcmp(k, "pack.usebitmaps")) {
use_bitmap_index = git_config_bool(k, v);
return 0;
}
}
+static void write_hash_cache(struct sha1file *f,
+ struct pack_idx_entry **index,
+ uint32_t index_nr)
+{
+ uint32_t i;
+
+ for (i = 0; i < index_nr; ++i) {
+ struct object_entry *entry = (struct object_entry *)index[i];
+ uint32_t hash_value = htonl(entry->hash);
+ sha1write(f, &hash_value, sizeof(hash_value));
+ }
+}
+
void bitmap_writer_set_checksum(unsigned char *sha1)
{
hashcpy(writer.pack_checksum, sha1);
void bitmap_writer_finish(struct pack_idx_entry **index,
uint32_t index_nr,
- const char *filename)
+ const char *filename,
+ uint16_t options)
{
static char tmp_file[PATH_MAX];
static uint16_t default_version = 1;
memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE));
header.version = htons(default_version);
- header.options = htons(flags);
+ header.options = htons(flags | options);
header.entry_count = htonl(writer.selected_nr);
memcpy(header.checksum, writer.pack_checksum, 20);
dump_bitmap(f, writer.tags);
write_selected_commits_v1(f, index, index_nr);
+ if (options & BITMAP_OPT_HASH_CACHE)
+ write_hash_cache(f, index, index_nr);
+
sha1close(f, NULL, CSUM_FSYNC);
if (adjust_shared_perm(tmp_file))
/* Number of bitmapped commits */
uint32_t entry_count;
+ /* Name-hash cache (or NULL if not present). */
+ uint32_t *hashes;
+
/*
* Extended index.
*
if ((flags & BITMAP_OPT_FULL_DAG) == 0)
return error("Unsupported options for bitmap index file "
"(Git requires BITMAP_OPT_FULL_DAG)");
+
+ if (flags & BITMAP_OPT_HASH_CACHE) {
+ unsigned char *end = index->map + index->map_size - 20;
+ index->hashes = ((uint32_t *)end) - index->pack->num_objects;
+ }
}
index->entry_count = ntohl(header->entry_count);
entry = &bitmap_git.reverse_index->revindex[pos + offset];
sha1 = nth_packed_object_sha1(bitmap_git.pack, entry->nr);
+ if (bitmap_git.hashes)
+ hash = ntohl(bitmap_git.hashes[entry->nr]);
+
show_reach(sha1, object_type, 0, hash, bitmap_git.pack, entry->offset);
}
#define NEEDS_BITMAP (1u<<22)
enum pack_bitmap_opts {
- BITMAP_OPT_FULL_DAG = 1
+ BITMAP_OPT_FULL_DAG = 1,
+ BITMAP_OPT_HASH_CACHE = 4,
};
enum pack_bitmap_flags {
void bitmap_writer_build(struct packing_data *to_pack);
void bitmap_writer_finish(struct pack_idx_entry **index,
uint32_t index_nr,
- const char *filename);
+ const char *filename,
+ uint16_t options);
#endif
# since we want to be able to compare bitmap-aware
# git versus non-bitmap git
test_expect_success 'setup bitmap config' '
- git config pack.writebitmaps true
+ git config pack.writebitmaps true &&
+ git config pack.writebitmaphashcache true
'
test_perf 'repack to disk' '
git checkout master &&
blob=$(echo tagged-blob | git hash-object -w --stdin) &&
git tag tagged-blob $blob &&
- git config pack.writebitmaps true
+ git config pack.writebitmaps true &&
+ git config pack.writebitmaphashcache true
'
test_expect_success 'full repack creates bitmaps' '