From: Junio C Hamano Date: Tue, 29 May 2007 08:16:28 +0000 (-0700) Subject: Merge branch 'dh/repack' (early part) X-Git-Tag: v1.5.3-rc0~182 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/a77a33a51df9b7655d80299487ec6fbb10445496?hp=-c Merge branch 'dh/repack' (early part) * 'dh/repack' (early part): Ensure git-repack -a -d --max-pack-size=N deletes correct packs pack-objects: clarification & option checks for --max-pack-size git-repack --max-pack-size: add option parsing to enable feature git-repack --max-pack-size: split packs as asked by write_{object,one}() git-repack --max-pack-size: write_{object,one}() respect pack limit git-repack --max-pack-size: new file statics and code restructuring Alter sha1close() 3rd argument to request flush only --- a77a33a51df9b7655d80299487ec6fbb10445496 diff --combined builtin-pack-objects.c index eca130f055,a4370bbee1..e52332df99 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@@ -1,6 -1,5 +1,6 @@@ #include "builtin.h" #include "cache.h" +#include "attr.h" #include "object.h" #include "blob.h" #include "commit.h" @@@ -16,7 -15,7 +16,7 @@@ #include "progress.h" static const char pack_usage[] = "\ - git-pack-objects [{ -q | --progress | --all-progress }] \n\ + git-pack-objects [{ -q | --progress | --all-progress }] [--max-pack-size=N] \n\ [--local] [--incremental] [--window=N] [--depth=N] \n\ [--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\ [--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\ @@@ -41,10 -40,9 +41,10 @@@ struct object_entry enum object_type in_pack_type; /* could be delta */ unsigned char in_pack_header_size; unsigned char preferred_base; /* we do not pack this, but is available - * to be used as the base objectto delta + * to be used as the base object to delta * objects against. */ + unsigned char no_try_delta; }; /* @@@ -54,7 -52,8 +54,8 @@@ * nice "minimum seek" order. */ static struct object_entry *objects; - static uint32_t nr_objects, nr_alloc, nr_result; + static struct object_entry **written_list; + static uint32_t nr_objects, nr_alloc, nr_result, nr_written; static int non_empty; static int no_reuse_delta, no_reuse_object; @@@ -63,9 -62,11 +64,11 @@@ static int incremental static int allow_ofs_delta; static const char *pack_tmp_name, *idx_tmp_name; static char tmpname[PATH_MAX]; + static const char *base_name; static unsigned char pack_file_sha1[20]; static int progress = 1; static int window = 10; + static uint32_t pack_size_limit; static int depth = 50; static int pack_to_stdout; static int num_preferred_base; @@@ -351,16 -352,31 +354,31 @@@ static void copy_pack_data(struct sha1f } static unsigned long write_object(struct sha1file *f, - struct object_entry *entry) + struct object_entry *entry, + off_t write_offset) { unsigned long size; enum object_type type; void *buf; unsigned char header[10]; + unsigned char dheader[10]; unsigned hdrlen; off_t datalen; enum object_type obj_type; int to_reuse = 0; + /* write limit if limited packsize and not first object */ + unsigned long limit = pack_size_limit && nr_written ? + pack_size_limit - write_offset : 0; + /* no if no delta */ + int usable_delta = !entry->delta ? 0 : + /* yes if unlimited packfile */ + !pack_size_limit ? 1 : + /* no if base written to previous pack */ + entry->delta->offset == (off_t)-1 ? 0 : + /* otherwise double-check written to this + * pack, like we do below + */ + entry->delta->offset ? 1 : 0; if (!pack_to_stdout) crc32_begin(f); @@@ -371,7 -387,9 +389,9 @@@ else if (!entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA) - to_reuse = 1; /* check_object() decided it for us */ + /* check_object() decided it for us ... */ + to_reuse = usable_delta; + /* ... but pack split may override that */ else if (obj_type != entry->in_pack_type) to_reuse = 0; /* pack has delta which is unusable */ else if (entry->delta) @@@ -382,24 -400,48 +402,48 @@@ */ if (!to_reuse) { + z_stream stream; + unsigned long maxsize; + void *out; buf = read_sha1_file(entry->sha1, &type, &size); if (!buf) die("unable to read %s", sha1_to_hex(entry->sha1)); if (size != entry->size) die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); - if (entry->delta) { + if (usable_delta) { buf = delta_against(buf, size, entry); size = entry->delta_size; obj_type = (allow_ofs_delta && entry->delta->offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; + } else { + /* + * recover real object type in case + * check_object() wanted to re-use a delta, + * but we couldn't since base was in previous split pack + */ + obj_type = type; } + /* compress the data to store and put compressed length in datalen */ + memset(&stream, 0, sizeof(stream)); + deflateInit(&stream, pack_compression_level); + maxsize = deflateBound(&stream, size); + out = xmalloc(maxsize); + /* Compress it */ + stream.next_in = buf; + stream.avail_in = size; + stream.next_out = out; + stream.avail_out = maxsize; + while (deflate(&stream, Z_FINISH) == Z_OK) + /* nothing */; + deflateEnd(&stream); + datalen = stream.total_out; + deflateEnd(&stream); /* * The object header is a byte of 'type' followed by zero or * more bytes of length. */ hdrlen = encode_header(obj_type, size, header); - sha1write(f, header, hdrlen); if (obj_type == OBJ_OFS_DELTA) { /* @@@ -408,21 -450,41 +452,41 @@@ * base from this object's position in the pack. */ off_t ofs = entry->offset - entry->delta->offset; - unsigned pos = sizeof(header) - 1; - header[pos] = ofs & 127; + unsigned pos = sizeof(dheader) - 1; + dheader[pos] = ofs & 127; while (ofs >>= 7) - header[--pos] = 128 | (--ofs & 127); - sha1write(f, header + pos, sizeof(header) - pos); - hdrlen += sizeof(header) - pos; + dheader[--pos] = 128 | (--ofs & 127); + if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { + free(out); + free(buf); + return 0; + } + sha1write(f, header, hdrlen); + sha1write(f, dheader + pos, sizeof(dheader) - pos); + hdrlen += sizeof(dheader) - pos; } else if (obj_type == OBJ_REF_DELTA) { /* * Deltas with a base reference contain * an additional 20 bytes for the base sha1. */ + if (limit && hdrlen + 20 + datalen + 20 >= limit) { + free(out); + free(buf); + return 0; + } + sha1write(f, header, hdrlen); sha1write(f, entry->delta->sha1, 20); hdrlen += 20; + } else { + if (limit && hdrlen + datalen + 20 >= limit) { + free(out); + free(buf); + return 0; + } + sha1write(f, header, hdrlen); } - datalen = sha1write_compressed(f, buf, size, pack_compression_level); + sha1write(f, out, datalen); + free(out); free(buf); } else { @@@ -437,20 -499,6 +501,6 @@@ reused_delta++; } hdrlen = encode_header(obj_type, entry->size, header); - sha1write(f, header, hdrlen); - if (obj_type == OBJ_OFS_DELTA) { - off_t ofs = entry->offset - entry->delta->offset; - unsigned pos = sizeof(header) - 1; - header[pos] = ofs & 127; - while (ofs >>= 7) - header[--pos] = 128 | (--ofs & 127); - sha1write(f, header + pos, sizeof(header) - pos); - hdrlen += sizeof(header) - pos; - } else if (obj_type == OBJ_REF_DELTA) { - sha1write(f, entry->delta->sha1, 20); - hdrlen += 20; - } - offset = entry->in_pack_offset; revidx = find_packed_object(p, offset); datalen = revidx[1].offset - offset; @@@ -459,6 -507,29 +509,29 @@@ die("bad packed object CRC for %s", sha1_to_hex(entry->sha1)); offset += entry->in_pack_header_size; datalen -= entry->in_pack_header_size; + if (obj_type == OBJ_OFS_DELTA) { + off_t ofs = entry->offset - entry->delta->offset; + unsigned pos = sizeof(dheader) - 1; + dheader[pos] = ofs & 127; + while (ofs >>= 7) + dheader[--pos] = 128 | (--ofs & 127); + if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) + return 0; + sha1write(f, header, hdrlen); + sha1write(f, dheader + pos, sizeof(dheader) - pos); + hdrlen += sizeof(dheader) - pos; + } else if (obj_type == OBJ_REF_DELTA) { + if (limit && hdrlen + 20 + datalen + 20 >= limit) + return 0; + sha1write(f, header, hdrlen); + sha1write(f, entry->delta->sha1, 20); + hdrlen += 20; + } else { + if (limit && hdrlen + datalen + 20 >= limit) + return 0; + sha1write(f, header, hdrlen); + } + if (!pack_to_stdout && p->index_version == 1 && check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) die("corrupt packed object for %s", sha1_to_hex(entry->sha1)); @@@ -466,7 -537,7 +539,7 @@@ unuse_pack(&w_curs); reused++; } - if (entry->delta) + if (usable_delta) written_delta++; written++; if (!pack_to_stdout) @@@ -485,11 -556,19 +558,19 @@@ static off_t write_one(struct sha1file return offset; /* if we are deltified, write out base object first. */ - if (e->delta) + if (e->delta) { offset = write_one(f, e->delta, offset); + if (!offset) + return 0; + } e->offset = offset; - size = write_object(f, e); + size = write_object(f, e, offset); + if (!size) { + e->offset = 0; + return 0; + } + written_list[nr_written++] = e; /* make sure off_t is sufficiently large not to wrap */ if (offset > offset + size) @@@ -503,49 -582,113 +584,113 @@@ static int open_object_dir_tmp(const ch return mkstemp(tmpname); } - static off_t write_pack_file(void) + /* forward declarations for write_pack_file */ + static void write_index_file(off_t last_obj_offset, unsigned char *sha1); + static int adjust_perm(const char *path, mode_t mode); + + static void write_pack_file(void) { - uint32_t i; + uint32_t i = 0, j; struct sha1file *f; - off_t offset, last_obj_offset = 0; + off_t offset, offset_one, last_obj_offset = 0; struct pack_header hdr; - int do_progress = progress; - - if (pack_to_stdout) { - f = sha1fd(1, ""); - do_progress >>= 1; - } else { - int fd = open_object_dir_tmp("tmp_pack_XXXXXX"); - if (fd < 0) - die("unable to create %s: %s\n", tmpname, strerror(errno)); - pack_tmp_name = xstrdup(tmpname); - f = sha1fd(fd, pack_tmp_name); - } + int do_progress = progress >> pack_to_stdout; + uint32_t nr_remaining = nr_result; if (do_progress) start_progress(&progress_state, "Writing %u objects...", "", nr_result); + written_list = xmalloc(nr_objects * sizeof(struct object_entry *)); - hdr.hdr_signature = htonl(PACK_SIGNATURE); - hdr.hdr_version = htonl(PACK_VERSION); - hdr.hdr_entries = htonl(nr_result); - sha1write(f, &hdr, sizeof(hdr)); - offset = sizeof(hdr); - if (!nr_result) - goto done; - for (i = 0; i < nr_objects; i++) { - last_obj_offset = offset; - offset = write_one(f, objects + i, offset); - if (do_progress) - display_progress(&progress_state, written); - } + do { + if (pack_to_stdout) { + f = sha1fd(1, ""); + } else { + int fd = open_object_dir_tmp("tmp_pack_XXXXXX"); + if (fd < 0) + die("unable to create %s: %s\n", tmpname, strerror(errno)); + pack_tmp_name = xstrdup(tmpname); + f = sha1fd(fd, pack_tmp_name); + } + + hdr.hdr_signature = htonl(PACK_SIGNATURE); + hdr.hdr_version = htonl(PACK_VERSION); + hdr.hdr_entries = htonl(nr_remaining); + sha1write(f, &hdr, sizeof(hdr)); + offset = sizeof(hdr); + nr_written = 0; + for (; i < nr_objects; i++) { + last_obj_offset = offset; + offset_one = write_one(f, objects + i, offset); + if (!offset_one) + break; + offset = offset_one; + if (do_progress) + display_progress(&progress_state, written); + } + + /* + * Did we write the wrong # entries in the header? + * If so, rewrite it like in fast-import + */ + if (pack_to_stdout || nr_written == nr_remaining) { + sha1close(f, pack_file_sha1, 1); + } else { + sha1close(f, pack_file_sha1, 0); + fixup_pack_header_footer(f->fd, pack_file_sha1, pack_tmp_name, nr_written); + close(f->fd); + } + + if (!pack_to_stdout) { + unsigned char object_list_sha1[20]; + mode_t mode = umask(0); + + umask(mode); + mode = 0444 & ~mode; + + write_index_file(last_obj_offset, object_list_sha1); + snprintf(tmpname, sizeof(tmpname), "%s-%s.pack", + base_name, sha1_to_hex(object_list_sha1)); + if (adjust_perm(pack_tmp_name, mode)) + die("unable to make temporary pack file readable: %s", + strerror(errno)); + if (rename(pack_tmp_name, tmpname)) + die("unable to rename temporary pack file: %s", + strerror(errno)); + snprintf(tmpname, sizeof(tmpname), "%s-%s.idx", + base_name, sha1_to_hex(object_list_sha1)); + if (adjust_perm(idx_tmp_name, mode)) + die("unable to make temporary index file readable: %s", + strerror(errno)); + if (rename(idx_tmp_name, tmpname)) + die("unable to rename temporary index file: %s", + strerror(errno)); + puts(sha1_to_hex(object_list_sha1)); + } + + /* mark written objects as written to previous pack */ + for (j = 0; j < nr_written; j++) { + written_list[j]->offset = (off_t)-1; + } + nr_remaining -= nr_written; + } while (nr_remaining && i < nr_objects); + + free(written_list); if (do_progress) stop_progress(&progress_state); - done: if (written != nr_result) die("wrote %u objects while expecting %u", written, nr_result); - sha1close(f, pack_file_sha1, 1); - - return last_obj_offset; + /* + * We have scanned through [0 ... i). Since we have written + * the correct number of objects, the remaining [i ... nr_objects) + * items must be either already written (due to out-of-order delta base) + * or a preferred base. Count those which are neither and complain if any. + */ + for (j = 0; i < nr_objects; i++) { + struct object_entry *e = objects + i; + j += !e->offset && !e->preferred_base; + } + if (j) + die("wrote %u objects as expected but %u unwritten", written, j); } static int sha1_sort(const void *_a, const void *_b) @@@ -572,18 -715,11 +717,11 @@@ static void write_index_file(off_t last idx_tmp_name = xstrdup(tmpname); f = sha1fd(fd, idx_tmp_name); - if (nr_result) { - uint32_t j = 0; - sorted_by_sha = - xcalloc(nr_result, sizeof(struct object_entry *)); - for (i = 0; i < nr_objects; i++) - if (!objects[i].preferred_base) - sorted_by_sha[j++] = objects + i; - if (j != nr_result) - die("listed %u objects while expecting %u", j, nr_result); - qsort(sorted_by_sha, nr_result, sizeof(*sorted_by_sha), sha1_sort); + if (nr_written) { + sorted_by_sha = written_list; + qsort(sorted_by_sha, nr_written, sizeof(*sorted_by_sha), sha1_sort); list = sorted_by_sha; - last = sorted_by_sha + nr_result; + last = sorted_by_sha + nr_written; } else sorted_by_sha = list = last = NULL; @@@ -621,7 -757,7 +759,7 @@@ /* Write the actual SHA1 entries. */ list = sorted_by_sha; - for (i = 0; i < nr_result; i++) { + for (i = 0; i < nr_written; i++) { struct object_entry *entry = *list++; if (index_version < 2) { uint32_t offset = htonl(entry->offset); @@@ -636,7 -772,7 +774,7 @@@ /* write the crc32 table */ list = sorted_by_sha; - for (i = 0; i < nr_objects; i++) { + for (i = 0; i < nr_written; i++) { struct object_entry *entry = *list++; uint32_t crc32_val = htonl(entry->crc32); sha1write(f, &crc32_val, 4); @@@ -644,7 -780,7 +782,7 @@@ /* write the 32-bit offset table */ list = sorted_by_sha; - for (i = 0; i < nr_objects; i++) { + for (i = 0; i < nr_written; i++) { struct object_entry *entry = *list++; uint32_t offset = (entry->offset <= index_off32_limit) ? entry->offset : (0x80000000 | nr_large_offset++); @@@ -669,7 -805,6 +807,6 @@@ sha1write(f, pack_file_sha1, 20); sha1close(f, NULL, 1); - free(sorted_by_sha); SHA1_Final(sha1, &ctx); } @@@ -725,9 -860,6 +862,9 @@@ static unsigned name_hash(const char *n unsigned char c; unsigned hash = 0; + if (!name) + return 0; + /* * This effectively just creates a sortable number from the * last sixteen non-whitespace characters. Last characters @@@ -741,36 -873,13 +878,36 @@@ return hash; } +static void setup_delta_attr_check(struct git_attr_check *check) +{ + static struct git_attr *attr_delta; + + if (!attr_delta) + attr_delta = git_attr("delta", 5); + + check[0].attr = attr_delta; +} + +static int no_try_delta(const char *path) +{ + struct git_attr_check check[1]; + + setup_delta_attr_check(check); + if (git_checkattr(path, ARRAY_SIZE(check), check)) + return 0; + if (ATTR_FALSE(check->value)) + return 1; + return 0; +} + static int add_object_entry(const unsigned char *sha1, enum object_type type, - unsigned hash, int exclude) + const char *name, int exclude) { struct object_entry *entry; struct packed_git *p, *found_pack = NULL; off_t found_offset = 0; int ix; + unsigned hash = name_hash(name); ix = nr_objects ? locate_object_entry_hash(sha1) : -1; if (ix >= 0) { @@@ -827,9 -936,6 +964,9 @@@ if (progress) display_progress(&progress_state, nr_objects); + if (name && no_try_delta(name)) + entry->no_try_delta = 1; + return 1; } @@@ -962,9 -1068,10 +1099,9 @@@ static void add_pbase_object(struct tre if (cmp < 0) return; if (name[cmplen] != '/') { - unsigned hash = name_hash(fullname); add_object_entry(entry.sha1, S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB, - hash, 1); + fullname, 1); return; } if (S_ISDIR(entry.mode)) { @@@ -1024,11 -1131,10 +1161,11 @@@ static int check_pbase_path(unsigned ha return 0; } -static void add_preferred_base_object(const char *name, unsigned hash) +static void add_preferred_base_object(const char *name) { struct pbase_tree *it; int cmplen; + unsigned hash = name_hash(name); if (!num_preferred_base || check_pbase_path(hash)) return; @@@ -1036,7 -1142,7 +1173,7 @@@ cmplen = name_cmp_len(name); for (it = pbase_tree; it; it = it->next) { if (cmplen == 0) { - add_object_entry(it->pcache.sha1, OBJ_TREE, 0, 1); + add_object_entry(it->pcache.sha1, OBJ_TREE, NULL, 1); } else { struct tree_desc tree; @@@ -1378,10 -1484,6 +1515,10 @@@ static void find_deltas(struct object_e if (entry->size < 50) continue; + + if (entry->no_try_delta) + continue; + free_delta_index(n->index); n->index = NULL; free(n->data); @@@ -1481,6 -1583,7 +1618,6 @@@ static void read_object_list_from_stdin { char line[40 + 1 + PATH_MAX + 2]; unsigned char sha1[20]; - unsigned hash; for (;;) { if (!fgets(line, sizeof(line), stdin)) { @@@ -1503,20 -1606,22 +1640,20 @@@ if (get_sha1_hex(line, sha1)) die("expected sha1, got garbage:\n %s", line); - hash = name_hash(line+41); - add_preferred_base_object(line+41, hash); - add_object_entry(sha1, 0, hash, 0); + add_preferred_base_object(line+41); + add_object_entry(sha1, 0, line+41, 0); } } static void show_commit(struct commit *commit) { - add_object_entry(commit->object.sha1, OBJ_COMMIT, 0, 0); + add_object_entry(commit->object.sha1, OBJ_COMMIT, NULL, 0); } static void show_object(struct object_array_entry *p) { - unsigned hash = name_hash(p->name); - add_preferred_base_object(p->name, hash); - add_object_entry(p->item->sha1, p->item->type, hash, 0); + add_preferred_base_object(p->name); + add_object_entry(p->item->sha1, p->item->type, p->name, 0); } static void show_edge(struct commit *commit) @@@ -1569,8 -1674,6 +1706,6 @@@ int cmd_pack_objects(int argc, const ch int use_internal_rev_list = 0; int thin = 0; uint32_t i; - off_t last_obj_offset; - const char *base_name = NULL; const char **rp_av; int rp_ac_alloc = 64; int rp_ac; @@@ -1616,6 -1719,13 +1751,13 @@@ pack_compression_level = level; continue; } + if (!prefixcmp(arg, "--max-pack-size=")) { + char *end; + pack_size_limit = strtoul(arg+16, &end, 0) * 1024 * 1024; + if (!arg[16] || *end) + usage(pack_usage); + continue; + } if (!prefixcmp(arg, "--window=")) { char *end; window = strtoul(arg+9, &end, 0); @@@ -1714,6 -1824,9 +1856,9 @@@ if (pack_to_stdout != !base_name) usage(pack_usage); + if (pack_to_stdout && pack_size_limit) + die("--max-pack-size cannot be used to build a pack for transfer."); + if (!pack_to_stdout && thin) die("--thin cannot be used to build an indexable pack."); @@@ -1739,33 -1852,7 +1884,7 @@@ fprintf(stderr, "Result has %u objects.\n", nr_result); if (nr_result) prepare_pack(window, depth); - last_obj_offset = write_pack_file(); - if (!pack_to_stdout) { - unsigned char object_list_sha1[20]; - mode_t mode = umask(0); - - umask(mode); - mode = 0444 & ~mode; - - write_index_file(last_obj_offset, object_list_sha1); - snprintf(tmpname, sizeof(tmpname), "%s-%s.pack", - base_name, sha1_to_hex(object_list_sha1)); - if (adjust_perm(pack_tmp_name, mode)) - die("unable to make temporary pack file readable: %s", - strerror(errno)); - if (rename(pack_tmp_name, tmpname)) - die("unable to rename temporary pack file: %s", - strerror(errno)); - snprintf(tmpname, sizeof(tmpname), "%s-%s.idx", - base_name, sha1_to_hex(object_list_sha1)); - if (adjust_perm(idx_tmp_name, mode)) - die("unable to make temporary index file readable: %s", - strerror(errno)); - if (rename(idx_tmp_name, tmpname)) - die("unable to rename temporary index file: %s", - strerror(errno)); - puts(sha1_to_hex(object_list_sha1)); - } + write_pack_file(); if (progress) fprintf(stderr, "Total %u (delta %u), reused %u (delta %u)\n", written, written_delta, reused, reused_delta);