1#include "builtin.h" 2#include "cache.h" 3#include "attr.h" 4#include "object.h" 5#include "blob.h" 6#include "commit.h" 7#include "tag.h" 8#include "tree.h" 9#include "delta.h" 10#include "pack.h" 11#include "pack-revindex.h" 12#include "csum-file.h" 13#include "tree-walk.h" 14#include "diff.h" 15#include "revision.h" 16#include "list-objects.h" 17#include "pack-objects.h" 18#include "progress.h" 19#include "refs.h" 20#include "streaming.h" 21#include "thread-utils.h" 22#include "pack-bitmap.h" 23#include "reachable.h" 24#include "sha1-array.h" 25#include "argv-array.h" 26 27static const char *pack_usage[] = { 28 N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"), 29 N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"), 30 NULL 31}; 32 33/* 34 * Objects we are going to pack are collected in the `to_pack` structure. 35 * It contains an array (dynamically expanded) of the object data, and a map 36 * that can resolve SHA1s to their position in the array. 37 */ 38static struct packing_data to_pack; 39 40static struct pack_idx_entry **written_list; 41static uint32_t nr_result, nr_written; 42 43static int non_empty; 44static int reuse_delta = 1, reuse_object = 1; 45static int keep_unreachable, unpack_unreachable, include_tag; 46static unsigned long unpack_unreachable_expiration; 47static int pack_loose_unreachable; 48static int local; 49static int have_non_local_packs; 50static int incremental; 51static int ignore_packed_keep; 52static int allow_ofs_delta; 53static struct pack_idx_option pack_idx_opts; 54static const char *base_name; 55static int progress = 1; 56static int window = 10; 57static unsigned long pack_size_limit; 58static int depth = 50; 59static int delta_search_threads; 60static int pack_to_stdout; 61static int num_preferred_base; 62static struct progress *progress_state; 63static int pack_compression_level = Z_DEFAULT_COMPRESSION; 64static int pack_compression_seen; 65 66static struct packed_git *reuse_packfile; 67static uint32_t reuse_packfile_objects; 68static off_t reuse_packfile_offset; 69 70static int use_bitmap_index_default = 1; 71static int use_bitmap_index = -1; 72static int write_bitmap_index; 73static uint16_t write_bitmap_options; 74 75static unsigned long delta_cache_size = 0; 76static unsigned long max_delta_cache_size = 256 * 1024 * 1024; 77static unsigned long cache_max_small_delta_size = 1000; 78 79static unsigned long window_memory_limit = 0; 80 81/* 82 * stats 83 */ 84static uint32_t written, written_delta; 85static uint32_t reused, reused_delta; 86 87/* 88 * Indexed commits 89 */ 90static struct commit **indexed_commits; 91static unsigned int indexed_commits_nr; 92static unsigned int indexed_commits_alloc; 93 94static void index_commit_for_bitmap(struct commit *commit) 95{ 96 if (indexed_commits_nr >= indexed_commits_alloc) { 97 indexed_commits_alloc = (indexed_commits_alloc + 32) * 2; 98 REALLOC_ARRAY(indexed_commits, indexed_commits_alloc); 99 } 100 101 indexed_commits[indexed_commits_nr++] = commit; 102} 103 104static void *get_delta(struct object_entry *entry) 105{ 106 unsigned long size, base_size, delta_size; 107 void *buf, *base_buf, *delta_buf; 108 enum object_type type; 109 110 buf = read_sha1_file(entry->idx.sha1, &type, &size); 111 if (!buf) 112 die("unable to read %s", sha1_to_hex(entry->idx.sha1)); 113 base_buf = read_sha1_file(entry->delta->idx.sha1, &type, &base_size); 114 if (!base_buf) 115 die("unable to read %s", sha1_to_hex(entry->delta->idx.sha1)); 116 delta_buf = diff_delta(base_buf, base_size, 117 buf, size, &delta_size, 0); 118 if (!delta_buf || delta_size != entry->delta_size) 119 die("delta size changed"); 120 free(buf); 121 free(base_buf); 122 return delta_buf; 123} 124 125static unsigned long do_compress(void **pptr, unsigned long size) 126{ 127 git_zstream stream; 128 void *in, *out; 129 unsigned long maxsize; 130 131 git_deflate_init(&stream, pack_compression_level); 132 maxsize = git_deflate_bound(&stream, size); 133 134 in = *pptr; 135 out = xmalloc(maxsize); 136 *pptr = out; 137 138 stream.next_in = in; 139 stream.avail_in = size; 140 stream.next_out = out; 141 stream.avail_out = maxsize; 142 while (git_deflate(&stream, Z_FINISH) == Z_OK) 143 ; /* nothing */ 144 git_deflate_end(&stream); 145 146 free(in); 147 return stream.total_out; 148} 149 150static unsigned long write_large_blob_data(struct git_istream *st, struct sha1file *f, 151 const unsigned char *sha1) 152{ 153 git_zstream stream; 154 unsigned char ibuf[1024 * 16]; 155 unsigned char obuf[1024 * 16]; 156 unsigned long olen = 0; 157 158 git_deflate_init(&stream, pack_compression_level); 159 160 for (;;) { 161 ssize_t readlen; 162 int zret = Z_OK; 163 readlen = read_istream(st, ibuf, sizeof(ibuf)); 164 if (readlen == -1) 165 die(_("unable to read %s"), sha1_to_hex(sha1)); 166 167 stream.next_in = ibuf; 168 stream.avail_in = readlen; 169 while ((stream.avail_in || readlen == 0) && 170 (zret == Z_OK || zret == Z_BUF_ERROR)) { 171 stream.next_out = obuf; 172 stream.avail_out = sizeof(obuf); 173 zret = git_deflate(&stream, readlen ? 0 : Z_FINISH); 174 sha1write(f, obuf, stream.next_out - obuf); 175 olen += stream.next_out - obuf; 176 } 177 if (stream.avail_in) 178 die(_("deflate error (%d)"), zret); 179 if (readlen == 0) { 180 if (zret != Z_STREAM_END) 181 die(_("deflate error (%d)"), zret); 182 break; 183 } 184 } 185 git_deflate_end(&stream); 186 return olen; 187} 188 189/* 190 * we are going to reuse the existing object data as is. make 191 * sure it is not corrupt. 192 */ 193static int check_pack_inflate(struct packed_git *p, 194 struct pack_window **w_curs, 195 off_t offset, 196 off_t len, 197 unsigned long expect) 198{ 199 git_zstream stream; 200 unsigned char fakebuf[4096], *in; 201 int st; 202 203 memset(&stream, 0, sizeof(stream)); 204 git_inflate_init(&stream); 205 do { 206 in = use_pack(p, w_curs, offset, &stream.avail_in); 207 stream.next_in = in; 208 stream.next_out = fakebuf; 209 stream.avail_out = sizeof(fakebuf); 210 st = git_inflate(&stream, Z_FINISH); 211 offset += stream.next_in - in; 212 } while (st == Z_OK || st == Z_BUF_ERROR); 213 git_inflate_end(&stream); 214 return (st == Z_STREAM_END && 215 stream.total_out == expect && 216 stream.total_in == len) ? 0 : -1; 217} 218 219static void copy_pack_data(struct sha1file *f, 220 struct packed_git *p, 221 struct pack_window **w_curs, 222 off_t offset, 223 off_t len) 224{ 225 unsigned char *in; 226 unsigned long avail; 227 228 while (len) { 229 in = use_pack(p, w_curs, offset, &avail); 230 if (avail > len) 231 avail = (unsigned long)len; 232 sha1write(f, in, avail); 233 offset += avail; 234 len -= avail; 235 } 236} 237 238/* Return 0 if we will bust the pack-size limit */ 239static unsigned long write_no_reuse_object(struct sha1file *f, struct object_entry *entry, 240 unsigned long limit, int usable_delta) 241{ 242 unsigned long size, datalen; 243 unsigned char header[10], dheader[10]; 244 unsigned hdrlen; 245 enum object_type type; 246 void *buf; 247 struct git_istream *st = NULL; 248 249 if (!usable_delta) { 250 if (entry->type == OBJ_BLOB && 251 entry->size > big_file_threshold && 252 (st = open_istream(entry->idx.sha1, &type, &size, NULL)) != NULL) 253 buf = NULL; 254 else { 255 buf = read_sha1_file(entry->idx.sha1, &type, &size); 256 if (!buf) 257 die(_("unable to read %s"), sha1_to_hex(entry->idx.sha1)); 258 } 259 /* 260 * make sure no cached delta data remains from a 261 * previous attempt before a pack split occurred. 262 */ 263 free(entry->delta_data); 264 entry->delta_data = NULL; 265 entry->z_delta_size = 0; 266 } else if (entry->delta_data) { 267 size = entry->delta_size; 268 buf = entry->delta_data; 269 entry->delta_data = NULL; 270 type = (allow_ofs_delta && entry->delta->idx.offset) ? 271 OBJ_OFS_DELTA : OBJ_REF_DELTA; 272 } else { 273 buf = get_delta(entry); 274 size = entry->delta_size; 275 type = (allow_ofs_delta && entry->delta->idx.offset) ? 276 OBJ_OFS_DELTA : OBJ_REF_DELTA; 277 } 278 279 if (st) /* large blob case, just assume we don't compress well */ 280 datalen = size; 281 else if (entry->z_delta_size) 282 datalen = entry->z_delta_size; 283 else 284 datalen = do_compress(&buf, size); 285 286 /* 287 * The object header is a byte of 'type' followed by zero or 288 * more bytes of length. 289 */ 290 hdrlen = encode_in_pack_object_header(type, size, header); 291 292 if (type == OBJ_OFS_DELTA) { 293 /* 294 * Deltas with relative base contain an additional 295 * encoding of the relative offset for the delta 296 * base from this object's position in the pack. 297 */ 298 off_t ofs = entry->idx.offset - entry->delta->idx.offset; 299 unsigned pos = sizeof(dheader) - 1; 300 dheader[pos] = ofs & 127; 301 while (ofs >>= 7) 302 dheader[--pos] = 128 | (--ofs & 127); 303 if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { 304 if (st) 305 close_istream(st); 306 free(buf); 307 return 0; 308 } 309 sha1write(f, header, hdrlen); 310 sha1write(f, dheader + pos, sizeof(dheader) - pos); 311 hdrlen += sizeof(dheader) - pos; 312 } else if (type == OBJ_REF_DELTA) { 313 /* 314 * Deltas with a base reference contain 315 * an additional 20 bytes for the base sha1. 316 */ 317 if (limit && hdrlen + 20 + datalen + 20 >= limit) { 318 if (st) 319 close_istream(st); 320 free(buf); 321 return 0; 322 } 323 sha1write(f, header, hdrlen); 324 sha1write(f, entry->delta->idx.sha1, 20); 325 hdrlen += 20; 326 } else { 327 if (limit && hdrlen + datalen + 20 >= limit) { 328 if (st) 329 close_istream(st); 330 free(buf); 331 return 0; 332 } 333 sha1write(f, header, hdrlen); 334 } 335 if (st) { 336 datalen = write_large_blob_data(st, f, entry->idx.sha1); 337 close_istream(st); 338 } else { 339 sha1write(f, buf, datalen); 340 free(buf); 341 } 342 343 return hdrlen + datalen; 344} 345 346/* Return 0 if we will bust the pack-size limit */ 347static off_t write_reuse_object(struct sha1file *f, struct object_entry *entry, 348 unsigned long limit, int usable_delta) 349{ 350 struct packed_git *p = entry->in_pack; 351 struct pack_window *w_curs = NULL; 352 struct revindex_entry *revidx; 353 off_t offset; 354 enum object_type type = entry->type; 355 off_t datalen; 356 unsigned char header[10], dheader[10]; 357 unsigned hdrlen; 358 359 if (entry->delta) 360 type = (allow_ofs_delta && entry->delta->idx.offset) ? 361 OBJ_OFS_DELTA : OBJ_REF_DELTA; 362 hdrlen = encode_in_pack_object_header(type, entry->size, header); 363 364 offset = entry->in_pack_offset; 365 revidx = find_pack_revindex(p, offset); 366 datalen = revidx[1].offset - offset; 367 if (!pack_to_stdout && p->index_version > 1 && 368 check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) { 369 error("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1)); 370 unuse_pack(&w_curs); 371 return write_no_reuse_object(f, entry, limit, usable_delta); 372 } 373 374 offset += entry->in_pack_header_size; 375 datalen -= entry->in_pack_header_size; 376 377 if (!pack_to_stdout && p->index_version == 1 && 378 check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) { 379 error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1)); 380 unuse_pack(&w_curs); 381 return write_no_reuse_object(f, entry, limit, usable_delta); 382 } 383 384 if (type == OBJ_OFS_DELTA) { 385 off_t ofs = entry->idx.offset - entry->delta->idx.offset; 386 unsigned pos = sizeof(dheader) - 1; 387 dheader[pos] = ofs & 127; 388 while (ofs >>= 7) 389 dheader[--pos] = 128 | (--ofs & 127); 390 if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { 391 unuse_pack(&w_curs); 392 return 0; 393 } 394 sha1write(f, header, hdrlen); 395 sha1write(f, dheader + pos, sizeof(dheader) - pos); 396 hdrlen += sizeof(dheader) - pos; 397 reused_delta++; 398 } else if (type == OBJ_REF_DELTA) { 399 if (limit && hdrlen + 20 + datalen + 20 >= limit) { 400 unuse_pack(&w_curs); 401 return 0; 402 } 403 sha1write(f, header, hdrlen); 404 sha1write(f, entry->delta->idx.sha1, 20); 405 hdrlen += 20; 406 reused_delta++; 407 } else { 408 if (limit && hdrlen + datalen + 20 >= limit) { 409 unuse_pack(&w_curs); 410 return 0; 411 } 412 sha1write(f, header, hdrlen); 413 } 414 copy_pack_data(f, p, &w_curs, offset, datalen); 415 unuse_pack(&w_curs); 416 reused++; 417 return hdrlen + datalen; 418} 419 420/* Return 0 if we will bust the pack-size limit */ 421static off_t write_object(struct sha1file *f, 422 struct object_entry *entry, 423 off_t write_offset) 424{ 425 unsigned long limit; 426 off_t len; 427 int usable_delta, to_reuse; 428 429 if (!pack_to_stdout) 430 crc32_begin(f); 431 432 /* apply size limit if limited packsize and not first object */ 433 if (!pack_size_limit || !nr_written) 434 limit = 0; 435 else if (pack_size_limit <= write_offset) 436 /* 437 * the earlier object did not fit the limit; avoid 438 * mistaking this with unlimited (i.e. limit = 0). 439 */ 440 limit = 1; 441 else 442 limit = pack_size_limit - write_offset; 443 444 if (!entry->delta) 445 usable_delta = 0; /* no delta */ 446 else if (!pack_size_limit) 447 usable_delta = 1; /* unlimited packfile */ 448 else if (entry->delta->idx.offset == (off_t)-1) 449 usable_delta = 0; /* base was written to another pack */ 450 else if (entry->delta->idx.offset) 451 usable_delta = 1; /* base already exists in this pack */ 452 else 453 usable_delta = 0; /* base could end up in another pack */ 454 455 if (!reuse_object) 456 to_reuse = 0; /* explicit */ 457 else if (!entry->in_pack) 458 to_reuse = 0; /* can't reuse what we don't have */ 459 else if (entry->type == OBJ_REF_DELTA || entry->type == OBJ_OFS_DELTA) 460 /* check_object() decided it for us ... */ 461 to_reuse = usable_delta; 462 /* ... but pack split may override that */ 463 else if (entry->type != entry->in_pack_type) 464 to_reuse = 0; /* pack has delta which is unusable */ 465 else if (entry->delta) 466 to_reuse = 0; /* we want to pack afresh */ 467 else 468 to_reuse = 1; /* we have it in-pack undeltified, 469 * and we do not need to deltify it. 470 */ 471 472 if (!to_reuse) 473 len = write_no_reuse_object(f, entry, limit, usable_delta); 474 else 475 len = write_reuse_object(f, entry, limit, usable_delta); 476 if (!len) 477 return 0; 478 479 if (usable_delta) 480 written_delta++; 481 written++; 482 if (!pack_to_stdout) 483 entry->idx.crc32 = crc32_end(f); 484 return len; 485} 486 487enum write_one_status { 488 WRITE_ONE_SKIP = -1, /* already written */ 489 WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */ 490 WRITE_ONE_WRITTEN = 1, /* normal */ 491 WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */ 492}; 493 494static enum write_one_status write_one(struct sha1file *f, 495 struct object_entry *e, 496 off_t *offset) 497{ 498 off_t size; 499 int recursing; 500 501 /* 502 * we set offset to 1 (which is an impossible value) to mark 503 * the fact that this object is involved in "write its base 504 * first before writing a deltified object" recursion. 505 */ 506 recursing = (e->idx.offset == 1); 507 if (recursing) { 508 warning("recursive delta detected for object %s", 509 sha1_to_hex(e->idx.sha1)); 510 return WRITE_ONE_RECURSIVE; 511 } else if (e->idx.offset || e->preferred_base) { 512 /* offset is non zero if object is written already. */ 513 return WRITE_ONE_SKIP; 514 } 515 516 /* if we are deltified, write out base object first. */ 517 if (e->delta) { 518 e->idx.offset = 1; /* now recurse */ 519 switch (write_one(f, e->delta, offset)) { 520 case WRITE_ONE_RECURSIVE: 521 /* we cannot depend on this one */ 522 e->delta = NULL; 523 break; 524 default: 525 break; 526 case WRITE_ONE_BREAK: 527 e->idx.offset = recursing; 528 return WRITE_ONE_BREAK; 529 } 530 } 531 532 e->idx.offset = *offset; 533 size = write_object(f, e, *offset); 534 if (!size) { 535 e->idx.offset = recursing; 536 return WRITE_ONE_BREAK; 537 } 538 written_list[nr_written++] = &e->idx; 539 540 /* make sure off_t is sufficiently large not to wrap */ 541 if (signed_add_overflows(*offset, size)) 542 die("pack too large for current definition of off_t"); 543 *offset += size; 544 return WRITE_ONE_WRITTEN; 545} 546 547static int mark_tagged(const char *path, const struct object_id *oid, int flag, 548 void *cb_data) 549{ 550 unsigned char peeled[20]; 551 struct object_entry *entry = packlist_find(&to_pack, oid->hash, NULL); 552 553 if (entry) 554 entry->tagged = 1; 555 if (!peel_ref(path, peeled)) { 556 entry = packlist_find(&to_pack, peeled, NULL); 557 if (entry) 558 entry->tagged = 1; 559 } 560 return 0; 561} 562 563static inline void add_to_write_order(struct object_entry **wo, 564 unsigned int *endp, 565 struct object_entry *e) 566{ 567 if (e->filled) 568 return; 569 wo[(*endp)++] = e; 570 e->filled = 1; 571} 572 573static void add_descendants_to_write_order(struct object_entry **wo, 574 unsigned int *endp, 575 struct object_entry *e) 576{ 577 int add_to_order = 1; 578 while (e) { 579 if (add_to_order) { 580 struct object_entry *s; 581 /* add this node... */ 582 add_to_write_order(wo, endp, e); 583 /* all its siblings... */ 584 for (s = e->delta_sibling; s; s = s->delta_sibling) { 585 add_to_write_order(wo, endp, s); 586 } 587 } 588 /* drop down a level to add left subtree nodes if possible */ 589 if (e->delta_child) { 590 add_to_order = 1; 591 e = e->delta_child; 592 } else { 593 add_to_order = 0; 594 /* our sibling might have some children, it is next */ 595 if (e->delta_sibling) { 596 e = e->delta_sibling; 597 continue; 598 } 599 /* go back to our parent node */ 600 e = e->delta; 601 while (e && !e->delta_sibling) { 602 /* we're on the right side of a subtree, keep 603 * going up until we can go right again */ 604 e = e->delta; 605 } 606 if (!e) { 607 /* done- we hit our original root node */ 608 return; 609 } 610 /* pass it off to sibling at this level */ 611 e = e->delta_sibling; 612 } 613 }; 614} 615 616static void add_family_to_write_order(struct object_entry **wo, 617 unsigned int *endp, 618 struct object_entry *e) 619{ 620 struct object_entry *root; 621 622 for (root = e; root->delta; root = root->delta) 623 ; /* nothing */ 624 add_descendants_to_write_order(wo, endp, root); 625} 626 627static struct object_entry **compute_write_order(void) 628{ 629 unsigned int i, wo_end, last_untagged; 630 631 struct object_entry **wo; 632 struct object_entry *objects = to_pack.objects; 633 634 for (i = 0; i < to_pack.nr_objects; i++) { 635 objects[i].tagged = 0; 636 objects[i].filled = 0; 637 objects[i].delta_child = NULL; 638 objects[i].delta_sibling = NULL; 639 } 640 641 /* 642 * Fully connect delta_child/delta_sibling network. 643 * Make sure delta_sibling is sorted in the original 644 * recency order. 645 */ 646 for (i = to_pack.nr_objects; i > 0;) { 647 struct object_entry *e = &objects[--i]; 648 if (!e->delta) 649 continue; 650 /* Mark me as the first child */ 651 e->delta_sibling = e->delta->delta_child; 652 e->delta->delta_child = e; 653 } 654 655 /* 656 * Mark objects that are at the tip of tags. 657 */ 658 for_each_tag_ref(mark_tagged, NULL); 659 660 /* 661 * Give the objects in the original recency order until 662 * we see a tagged tip. 663 */ 664 ALLOC_ARRAY(wo, to_pack.nr_objects); 665 for (i = wo_end = 0; i < to_pack.nr_objects; i++) { 666 if (objects[i].tagged) 667 break; 668 add_to_write_order(wo, &wo_end, &objects[i]); 669 } 670 last_untagged = i; 671 672 /* 673 * Then fill all the tagged tips. 674 */ 675 for (; i < to_pack.nr_objects; i++) { 676 if (objects[i].tagged) 677 add_to_write_order(wo, &wo_end, &objects[i]); 678 } 679 680 /* 681 * And then all remaining commits and tags. 682 */ 683 for (i = last_untagged; i < to_pack.nr_objects; i++) { 684 if (objects[i].type != OBJ_COMMIT && 685 objects[i].type != OBJ_TAG) 686 continue; 687 add_to_write_order(wo, &wo_end, &objects[i]); 688 } 689 690 /* 691 * And then all the trees. 692 */ 693 for (i = last_untagged; i < to_pack.nr_objects; i++) { 694 if (objects[i].type != OBJ_TREE) 695 continue; 696 add_to_write_order(wo, &wo_end, &objects[i]); 697 } 698 699 /* 700 * Finally all the rest in really tight order 701 */ 702 for (i = last_untagged; i < to_pack.nr_objects; i++) { 703 if (!objects[i].filled) 704 add_family_to_write_order(wo, &wo_end, &objects[i]); 705 } 706 707 if (wo_end != to_pack.nr_objects) 708 die("ordered %u objects, expected %"PRIu32, wo_end, to_pack.nr_objects); 709 710 return wo; 711} 712 713static off_t write_reused_pack(struct sha1file *f) 714{ 715 unsigned char buffer[8192]; 716 off_t to_write, total; 717 int fd; 718 719 if (!is_pack_valid(reuse_packfile)) 720 die("packfile is invalid: %s", reuse_packfile->pack_name); 721 722 fd = git_open_noatime(reuse_packfile->pack_name); 723 if (fd < 0) 724 die_errno("unable to open packfile for reuse: %s", 725 reuse_packfile->pack_name); 726 727 if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1) 728 die_errno("unable to seek in reused packfile"); 729 730 if (reuse_packfile_offset < 0) 731 reuse_packfile_offset = reuse_packfile->pack_size - 20; 732 733 total = to_write = reuse_packfile_offset - sizeof(struct pack_header); 734 735 while (to_write) { 736 int read_pack = xread(fd, buffer, sizeof(buffer)); 737 738 if (read_pack <= 0) 739 die_errno("unable to read from reused packfile"); 740 741 if (read_pack > to_write) 742 read_pack = to_write; 743 744 sha1write(f, buffer, read_pack); 745 to_write -= read_pack; 746 747 /* 748 * We don't know the actual number of objects written, 749 * only how many bytes written, how many bytes total, and 750 * how many objects total. So we can fake it by pretending all 751 * objects we are writing are the same size. This gives us a 752 * smooth progress meter, and at the end it matches the true 753 * answer. 754 */ 755 written = reuse_packfile_objects * 756 (((double)(total - to_write)) / total); 757 display_progress(progress_state, written); 758 } 759 760 close(fd); 761 written = reuse_packfile_objects; 762 display_progress(progress_state, written); 763 return reuse_packfile_offset - sizeof(struct pack_header); 764} 765 766static const char no_split_warning[] = N_( 767"disabling bitmap writing, packs are split due to pack.packSizeLimit" 768); 769 770static void write_pack_file(void) 771{ 772 uint32_t i = 0, j; 773 struct sha1file *f; 774 off_t offset; 775 uint32_t nr_remaining = nr_result; 776 time_t last_mtime = 0; 777 struct object_entry **write_order; 778 779 if (progress > pack_to_stdout) 780 progress_state = start_progress(_("Writing objects"), nr_result); 781 ALLOC_ARRAY(written_list, to_pack.nr_objects); 782 write_order = compute_write_order(); 783 784 do { 785 unsigned char sha1[20]; 786 char *pack_tmp_name = NULL; 787 788 if (pack_to_stdout) 789 f = sha1fd_throughput(1, "<stdout>", progress_state); 790 else 791 f = create_tmp_packfile(&pack_tmp_name); 792 793 offset = write_pack_header(f, nr_remaining); 794 795 if (reuse_packfile) { 796 off_t packfile_size; 797 assert(pack_to_stdout); 798 799 packfile_size = write_reused_pack(f); 800 offset += packfile_size; 801 } 802 803 nr_written = 0; 804 for (; i < to_pack.nr_objects; i++) { 805 struct object_entry *e = write_order[i]; 806 if (write_one(f, e, &offset) == WRITE_ONE_BREAK) 807 break; 808 display_progress(progress_state, written); 809 } 810 811 /* 812 * Did we write the wrong # entries in the header? 813 * If so, rewrite it like in fast-import 814 */ 815 if (pack_to_stdout) { 816 sha1close(f, sha1, CSUM_CLOSE); 817 } else if (nr_written == nr_remaining) { 818 sha1close(f, sha1, CSUM_FSYNC); 819 } else { 820 int fd = sha1close(f, sha1, 0); 821 fixup_pack_header_footer(fd, sha1, pack_tmp_name, 822 nr_written, sha1, offset); 823 close(fd); 824 if (write_bitmap_index) { 825 warning(_(no_split_warning)); 826 write_bitmap_index = 0; 827 } 828 } 829 830 if (!pack_to_stdout) { 831 struct stat st; 832 struct strbuf tmpname = STRBUF_INIT; 833 834 /* 835 * Packs are runtime accessed in their mtime 836 * order since newer packs are more likely to contain 837 * younger objects. So if we are creating multiple 838 * packs then we should modify the mtime of later ones 839 * to preserve this property. 840 */ 841 if (stat(pack_tmp_name, &st) < 0) { 842 warning_errno("failed to stat %s", pack_tmp_name); 843 } else if (!last_mtime) { 844 last_mtime = st.st_mtime; 845 } else { 846 struct utimbuf utb; 847 utb.actime = st.st_atime; 848 utb.modtime = --last_mtime; 849 if (utime(pack_tmp_name, &utb) < 0) 850 warning_errno("failed utime() on %s", pack_tmp_name); 851 } 852 853 strbuf_addf(&tmpname, "%s-", base_name); 854 855 if (write_bitmap_index) { 856 bitmap_writer_set_checksum(sha1); 857 bitmap_writer_build_type_index(written_list, nr_written); 858 } 859 860 finish_tmp_packfile(&tmpname, pack_tmp_name, 861 written_list, nr_written, 862 &pack_idx_opts, sha1); 863 864 if (write_bitmap_index) { 865 strbuf_addf(&tmpname, "%s.bitmap", sha1_to_hex(sha1)); 866 867 stop_progress(&progress_state); 868 869 bitmap_writer_show_progress(progress); 870 bitmap_writer_reuse_bitmaps(&to_pack); 871 bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1); 872 bitmap_writer_build(&to_pack); 873 bitmap_writer_finish(written_list, nr_written, 874 tmpname.buf, write_bitmap_options); 875 write_bitmap_index = 0; 876 } 877 878 strbuf_release(&tmpname); 879 free(pack_tmp_name); 880 puts(sha1_to_hex(sha1)); 881 } 882 883 /* mark written objects as written to previous pack */ 884 for (j = 0; j < nr_written; j++) { 885 written_list[j]->offset = (off_t)-1; 886 } 887 nr_remaining -= nr_written; 888 } while (nr_remaining && i < to_pack.nr_objects); 889 890 free(written_list); 891 free(write_order); 892 stop_progress(&progress_state); 893 if (written != nr_result) 894 die("wrote %"PRIu32" objects while expecting %"PRIu32, 895 written, nr_result); 896} 897 898static void setup_delta_attr_check(struct git_attr_check *check) 899{ 900 static struct git_attr *attr_delta; 901 902 if (!attr_delta) 903 attr_delta = git_attr("delta"); 904 905 check[0].attr = attr_delta; 906} 907 908static int no_try_delta(const char *path) 909{ 910 struct git_attr_check check[1]; 911 912 setup_delta_attr_check(check); 913 if (git_check_attr(path, ARRAY_SIZE(check), check)) 914 return 0; 915 if (ATTR_FALSE(check->value)) 916 return 1; 917 return 0; 918} 919 920/* 921 * When adding an object, check whether we have already added it 922 * to our packing list. If so, we can skip. However, if we are 923 * being asked to excludei t, but the previous mention was to include 924 * it, make sure to adjust its flags and tweak our numbers accordingly. 925 * 926 * As an optimization, we pass out the index position where we would have 927 * found the item, since that saves us from having to look it up again a 928 * few lines later when we want to add the new entry. 929 */ 930static int have_duplicate_entry(const unsigned char *sha1, 931 int exclude, 932 uint32_t *index_pos) 933{ 934 struct object_entry *entry; 935 936 entry = packlist_find(&to_pack, sha1, index_pos); 937 if (!entry) 938 return 0; 939 940 if (exclude) { 941 if (!entry->preferred_base) 942 nr_result--; 943 entry->preferred_base = 1; 944 } 945 946 return 1; 947} 948 949static int want_found_object(int exclude, struct packed_git *p) 950{ 951 if (exclude) 952 return 1; 953 if (incremental) 954 return 0; 955 956 /* 957 * When asked to do --local (do not include an object that appears in a 958 * pack we borrow from elsewhere) or --honor-pack-keep (do not include 959 * an object that appears in a pack marked with .keep), finding a pack 960 * that matches the criteria is sufficient for us to decide to omit it. 961 * However, even if this pack does not satisfy the criteria, we need to 962 * make sure no copy of this object appears in _any_ pack that makes us 963 * to omit the object, so we need to check all the packs. 964 * 965 * We can however first check whether these options can possible matter; 966 * if they do not matter we know we want the object in generated pack. 967 * Otherwise, we signal "-1" at the end to tell the caller that we do 968 * not know either way, and it needs to check more packs. 969 */ 970 if (!ignore_packed_keep && 971 (!local || !have_non_local_packs)) 972 return 1; 973 974 if (local && !p->pack_local) 975 return 0; 976 if (ignore_packed_keep && p->pack_local && p->pack_keep) 977 return 0; 978 979 /* we don't know yet; keep looking for more packs */ 980 return -1; 981} 982 983/* 984 * Check whether we want the object in the pack (e.g., we do not want 985 * objects found in non-local stores if the "--local" option was used). 986 * 987 * If the caller already knows an existing pack it wants to take the object 988 * from, that is passed in *found_pack and *found_offset; otherwise this 989 * function finds if there is any pack that has the object and returns the pack 990 * and its offset in these variables. 991 */ 992static int want_object_in_pack(const unsigned char *sha1, 993 int exclude, 994 struct packed_git **found_pack, 995 off_t *found_offset) 996{ 997 struct packed_git *p; 998 int want; 9991000 if (!exclude && local && has_loose_object_nonlocal(sha1))1001 return 0;10021003 /*1004 * If we already know the pack object lives in, start checks from that1005 * pack - in the usual case when neither --local was given nor .keep files1006 * are present we will determine the answer right now.1007 */1008 if (*found_pack) {1009 want = want_found_object(exclude, *found_pack);1010 if (want != -1)1011 return want;1012 }10131014 for (p = packed_git; p; p = p->next) {1015 off_t offset;10161017 if (p == *found_pack)1018 offset = *found_offset;1019 else1020 offset = find_pack_entry_one(sha1, p);10211022 if (offset) {1023 if (!*found_pack) {1024 if (!is_pack_valid(p))1025 continue;1026 *found_offset = offset;1027 *found_pack = p;1028 }1029 want = want_found_object(exclude, p);1030 if (want != -1)1031 return want;1032 }1033 }10341035 return 1;1036}10371038static void create_object_entry(const unsigned char *sha1,1039 enum object_type type,1040 uint32_t hash,1041 int exclude,1042 int no_try_delta,1043 uint32_t index_pos,1044 struct packed_git *found_pack,1045 off_t found_offset)1046{1047 struct object_entry *entry;10481049 entry = packlist_alloc(&to_pack, sha1, index_pos);1050 entry->hash = hash;1051 if (type)1052 entry->type = type;1053 if (exclude)1054 entry->preferred_base = 1;1055 else1056 nr_result++;1057 if (found_pack) {1058 entry->in_pack = found_pack;1059 entry->in_pack_offset = found_offset;1060 }10611062 entry->no_try_delta = no_try_delta;1063}10641065static const char no_closure_warning[] = N_(1066"disabling bitmap writing, as some objects are not being packed"1067);10681069static int add_object_entry(const unsigned char *sha1, enum object_type type,1070 const char *name, int exclude)1071{1072 struct packed_git *found_pack = NULL;1073 off_t found_offset = 0;1074 uint32_t index_pos;10751076 if (have_duplicate_entry(sha1, exclude, &index_pos))1077 return 0;10781079 if (!want_object_in_pack(sha1, exclude, &found_pack, &found_offset)) {1080 /* The pack is missing an object, so it will not have closure */1081 if (write_bitmap_index) {1082 warning(_(no_closure_warning));1083 write_bitmap_index = 0;1084 }1085 return 0;1086 }10871088 create_object_entry(sha1, type, pack_name_hash(name),1089 exclude, name && no_try_delta(name),1090 index_pos, found_pack, found_offset);10911092 display_progress(progress_state, nr_result);1093 return 1;1094}10951096static int add_object_entry_from_bitmap(const unsigned char *sha1,1097 enum object_type type,1098 int flags, uint32_t name_hash,1099 struct packed_git *pack, off_t offset)1100{1101 uint32_t index_pos;11021103 if (have_duplicate_entry(sha1, 0, &index_pos))1104 return 0;11051106 if (!want_object_in_pack(sha1, 0, &pack, &offset))1107 return 0;11081109 create_object_entry(sha1, type, name_hash, 0, 0, index_pos, pack, offset);11101111 display_progress(progress_state, nr_result);1112 return 1;1113}11141115struct pbase_tree_cache {1116 unsigned char sha1[20];1117 int ref;1118 int temporary;1119 void *tree_data;1120 unsigned long tree_size;1121};11221123static struct pbase_tree_cache *(pbase_tree_cache[256]);1124static int pbase_tree_cache_ix(const unsigned char *sha1)1125{1126 return sha1[0] % ARRAY_SIZE(pbase_tree_cache);1127}1128static int pbase_tree_cache_ix_incr(int ix)1129{1130 return (ix+1) % ARRAY_SIZE(pbase_tree_cache);1131}11321133static struct pbase_tree {1134 struct pbase_tree *next;1135 /* This is a phony "cache" entry; we are not1136 * going to evict it or find it through _get()1137 * mechanism -- this is for the toplevel node that1138 * would almost always change with any commit.1139 */1140 struct pbase_tree_cache pcache;1141} *pbase_tree;11421143static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)1144{1145 struct pbase_tree_cache *ent, *nent;1146 void *data;1147 unsigned long size;1148 enum object_type type;1149 int neigh;1150 int my_ix = pbase_tree_cache_ix(sha1);1151 int available_ix = -1;11521153 /* pbase-tree-cache acts as a limited hashtable.1154 * your object will be found at your index or within a few1155 * slots after that slot if it is cached.1156 */1157 for (neigh = 0; neigh < 8; neigh++) {1158 ent = pbase_tree_cache[my_ix];1159 if (ent && !hashcmp(ent->sha1, sha1)) {1160 ent->ref++;1161 return ent;1162 }1163 else if (((available_ix < 0) && (!ent || !ent->ref)) ||1164 ((0 <= available_ix) &&1165 (!ent && pbase_tree_cache[available_ix])))1166 available_ix = my_ix;1167 if (!ent)1168 break;1169 my_ix = pbase_tree_cache_ix_incr(my_ix);1170 }11711172 /* Did not find one. Either we got a bogus request or1173 * we need to read and perhaps cache.1174 */1175 data = read_sha1_file(sha1, &type, &size);1176 if (!data)1177 return NULL;1178 if (type != OBJ_TREE) {1179 free(data);1180 return NULL;1181 }11821183 /* We need to either cache or return a throwaway copy */11841185 if (available_ix < 0)1186 ent = NULL;1187 else {1188 ent = pbase_tree_cache[available_ix];1189 my_ix = available_ix;1190 }11911192 if (!ent) {1193 nent = xmalloc(sizeof(*nent));1194 nent->temporary = (available_ix < 0);1195 }1196 else {1197 /* evict and reuse */1198 free(ent->tree_data);1199 nent = ent;1200 }1201 hashcpy(nent->sha1, sha1);1202 nent->tree_data = data;1203 nent->tree_size = size;1204 nent->ref = 1;1205 if (!nent->temporary)1206 pbase_tree_cache[my_ix] = nent;1207 return nent;1208}12091210static void pbase_tree_put(struct pbase_tree_cache *cache)1211{1212 if (!cache->temporary) {1213 cache->ref--;1214 return;1215 }1216 free(cache->tree_data);1217 free(cache);1218}12191220static int name_cmp_len(const char *name)1221{1222 int i;1223 for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)1224 ;1225 return i;1226}12271228static void add_pbase_object(struct tree_desc *tree,1229 const char *name,1230 int cmplen,1231 const char *fullname)1232{1233 struct name_entry entry;1234 int cmp;12351236 while (tree_entry(tree,&entry)) {1237 if (S_ISGITLINK(entry.mode))1238 continue;1239 cmp = tree_entry_len(&entry) != cmplen ? 1 :1240 memcmp(name, entry.path, cmplen);1241 if (cmp > 0)1242 continue;1243 if (cmp < 0)1244 return;1245 if (name[cmplen] != '/') {1246 add_object_entry(entry.oid->hash,1247 object_type(entry.mode),1248 fullname, 1);1249 return;1250 }1251 if (S_ISDIR(entry.mode)) {1252 struct tree_desc sub;1253 struct pbase_tree_cache *tree;1254 const char *down = name+cmplen+1;1255 int downlen = name_cmp_len(down);12561257 tree = pbase_tree_get(entry.oid->hash);1258 if (!tree)1259 return;1260 init_tree_desc(&sub, tree->tree_data, tree->tree_size);12611262 add_pbase_object(&sub, down, downlen, fullname);1263 pbase_tree_put(tree);1264 }1265 }1266}12671268static unsigned *done_pbase_paths;1269static int done_pbase_paths_num;1270static int done_pbase_paths_alloc;1271static int done_pbase_path_pos(unsigned hash)1272{1273 int lo = 0;1274 int hi = done_pbase_paths_num;1275 while (lo < hi) {1276 int mi = (hi + lo) / 2;1277 if (done_pbase_paths[mi] == hash)1278 return mi;1279 if (done_pbase_paths[mi] < hash)1280 hi = mi;1281 else1282 lo = mi + 1;1283 }1284 return -lo-1;1285}12861287static int check_pbase_path(unsigned hash)1288{1289 int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash);1290 if (0 <= pos)1291 return 1;1292 pos = -pos - 1;1293 ALLOC_GROW(done_pbase_paths,1294 done_pbase_paths_num + 1,1295 done_pbase_paths_alloc);1296 done_pbase_paths_num++;1297 if (pos < done_pbase_paths_num)1298 memmove(done_pbase_paths + pos + 1,1299 done_pbase_paths + pos,1300 (done_pbase_paths_num - pos - 1) * sizeof(unsigned));1301 done_pbase_paths[pos] = hash;1302 return 0;1303}13041305static void add_preferred_base_object(const char *name)1306{1307 struct pbase_tree *it;1308 int cmplen;1309 unsigned hash = pack_name_hash(name);13101311 if (!num_preferred_base || check_pbase_path(hash))1312 return;13131314 cmplen = name_cmp_len(name);1315 for (it = pbase_tree; it; it = it->next) {1316 if (cmplen == 0) {1317 add_object_entry(it->pcache.sha1, OBJ_TREE, NULL, 1);1318 }1319 else {1320 struct tree_desc tree;1321 init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);1322 add_pbase_object(&tree, name, cmplen, name);1323 }1324 }1325}13261327static void add_preferred_base(unsigned char *sha1)1328{1329 struct pbase_tree *it;1330 void *data;1331 unsigned long size;1332 unsigned char tree_sha1[20];13331334 if (window <= num_preferred_base++)1335 return;13361337 data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);1338 if (!data)1339 return;13401341 for (it = pbase_tree; it; it = it->next) {1342 if (!hashcmp(it->pcache.sha1, tree_sha1)) {1343 free(data);1344 return;1345 }1346 }13471348 it = xcalloc(1, sizeof(*it));1349 it->next = pbase_tree;1350 pbase_tree = it;13511352 hashcpy(it->pcache.sha1, tree_sha1);1353 it->pcache.tree_data = data;1354 it->pcache.tree_size = size;1355}13561357static void cleanup_preferred_base(void)1358{1359 struct pbase_tree *it;1360 unsigned i;13611362 it = pbase_tree;1363 pbase_tree = NULL;1364 while (it) {1365 struct pbase_tree *this = it;1366 it = this->next;1367 free(this->pcache.tree_data);1368 free(this);1369 }13701371 for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {1372 if (!pbase_tree_cache[i])1373 continue;1374 free(pbase_tree_cache[i]->tree_data);1375 free(pbase_tree_cache[i]);1376 pbase_tree_cache[i] = NULL;1377 }13781379 free(done_pbase_paths);1380 done_pbase_paths = NULL;1381 done_pbase_paths_num = done_pbase_paths_alloc = 0;1382}13831384static void check_object(struct object_entry *entry)1385{1386 if (entry->in_pack) {1387 struct packed_git *p = entry->in_pack;1388 struct pack_window *w_curs = NULL;1389 const unsigned char *base_ref = NULL;1390 struct object_entry *base_entry;1391 unsigned long used, used_0;1392 unsigned long avail;1393 off_t ofs;1394 unsigned char *buf, c;13951396 buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);13971398 /*1399 * We want in_pack_type even if we do not reuse delta1400 * since non-delta representations could still be reused.1401 */1402 used = unpack_object_header_buffer(buf, avail,1403 &entry->in_pack_type,1404 &entry->size);1405 if (used == 0)1406 goto give_up;14071408 /*1409 * Determine if this is a delta and if so whether we can1410 * reuse it or not. Otherwise let's find out as cheaply as1411 * possible what the actual type and size for this object is.1412 */1413 switch (entry->in_pack_type) {1414 default:1415 /* Not a delta hence we've already got all we need. */1416 entry->type = entry->in_pack_type;1417 entry->in_pack_header_size = used;1418 if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB)1419 goto give_up;1420 unuse_pack(&w_curs);1421 return;1422 case OBJ_REF_DELTA:1423 if (reuse_delta && !entry->preferred_base)1424 base_ref = use_pack(p, &w_curs,1425 entry->in_pack_offset + used, NULL);1426 entry->in_pack_header_size = used + 20;1427 break;1428 case OBJ_OFS_DELTA:1429 buf = use_pack(p, &w_curs,1430 entry->in_pack_offset + used, NULL);1431 used_0 = 0;1432 c = buf[used_0++];1433 ofs = c & 127;1434 while (c & 128) {1435 ofs += 1;1436 if (!ofs || MSB(ofs, 7)) {1437 error("delta base offset overflow in pack for %s",1438 sha1_to_hex(entry->idx.sha1));1439 goto give_up;1440 }1441 c = buf[used_0++];1442 ofs = (ofs << 7) + (c & 127);1443 }1444 ofs = entry->in_pack_offset - ofs;1445 if (ofs <= 0 || ofs >= entry->in_pack_offset) {1446 error("delta base offset out of bound for %s",1447 sha1_to_hex(entry->idx.sha1));1448 goto give_up;1449 }1450 if (reuse_delta && !entry->preferred_base) {1451 struct revindex_entry *revidx;1452 revidx = find_pack_revindex(p, ofs);1453 if (!revidx)1454 goto give_up;1455 base_ref = nth_packed_object_sha1(p, revidx->nr);1456 }1457 entry->in_pack_header_size = used + used_0;1458 break;1459 }14601461 if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) {1462 /*1463 * If base_ref was set above that means we wish to1464 * reuse delta data, and we even found that base1465 * in the list of objects we want to pack. Goodie!1466 *1467 * Depth value does not matter - find_deltas() will1468 * never consider reused delta as the base object to1469 * deltify other objects against, in order to avoid1470 * circular deltas.1471 */1472 entry->type = entry->in_pack_type;1473 entry->delta = base_entry;1474 entry->delta_size = entry->size;1475 entry->delta_sibling = base_entry->delta_child;1476 base_entry->delta_child = entry;1477 unuse_pack(&w_curs);1478 return;1479 }14801481 if (entry->type) {1482 /*1483 * This must be a delta and we already know what the1484 * final object type is. Let's extract the actual1485 * object size from the delta header.1486 */1487 entry->size = get_size_from_delta(p, &w_curs,1488 entry->in_pack_offset + entry->in_pack_header_size);1489 if (entry->size == 0)1490 goto give_up;1491 unuse_pack(&w_curs);1492 return;1493 }14941495 /*1496 * No choice but to fall back to the recursive delta walk1497 * with sha1_object_info() to find about the object type1498 * at this point...1499 */1500 give_up:1501 unuse_pack(&w_curs);1502 }15031504 entry->type = sha1_object_info(entry->idx.sha1, &entry->size);1505 /*1506 * The error condition is checked in prepare_pack(). This is1507 * to permit a missing preferred base object to be ignored1508 * as a preferred base. Doing so can result in a larger1509 * pack file, but the transfer will still take place.1510 */1511}15121513static int pack_offset_sort(const void *_a, const void *_b)1514{1515 const struct object_entry *a = *(struct object_entry **)_a;1516 const struct object_entry *b = *(struct object_entry **)_b;15171518 /* avoid filesystem trashing with loose objects */1519 if (!a->in_pack && !b->in_pack)1520 return hashcmp(a->idx.sha1, b->idx.sha1);15211522 if (a->in_pack < b->in_pack)1523 return -1;1524 if (a->in_pack > b->in_pack)1525 return 1;1526 return a->in_pack_offset < b->in_pack_offset ? -1 :1527 (a->in_pack_offset > b->in_pack_offset);1528}15291530static void get_object_details(void)1531{1532 uint32_t i;1533 struct object_entry **sorted_by_offset;15341535 sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *));1536 for (i = 0; i < to_pack.nr_objects; i++)1537 sorted_by_offset[i] = to_pack.objects + i;1538 qsort(sorted_by_offset, to_pack.nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);15391540 for (i = 0; i < to_pack.nr_objects; i++) {1541 struct object_entry *entry = sorted_by_offset[i];1542 check_object(entry);1543 if (big_file_threshold < entry->size)1544 entry->no_try_delta = 1;1545 }15461547 free(sorted_by_offset);1548}15491550/*1551 * We search for deltas in a list sorted by type, by filename hash, and then1552 * by size, so that we see progressively smaller and smaller files.1553 * That's because we prefer deltas to be from the bigger file1554 * to the smaller -- deletes are potentially cheaper, but perhaps1555 * more importantly, the bigger file is likely the more recent1556 * one. The deepest deltas are therefore the oldest objects which are1557 * less susceptible to be accessed often.1558 */1559static int type_size_sort(const void *_a, const void *_b)1560{1561 const struct object_entry *a = *(struct object_entry **)_a;1562 const struct object_entry *b = *(struct object_entry **)_b;15631564 if (a->type > b->type)1565 return -1;1566 if (a->type < b->type)1567 return 1;1568 if (a->hash > b->hash)1569 return -1;1570 if (a->hash < b->hash)1571 return 1;1572 if (a->preferred_base > b->preferred_base)1573 return -1;1574 if (a->preferred_base < b->preferred_base)1575 return 1;1576 if (a->size > b->size)1577 return -1;1578 if (a->size < b->size)1579 return 1;1580 return a < b ? -1 : (a > b); /* newest first */1581}15821583struct unpacked {1584 struct object_entry *entry;1585 void *data;1586 struct delta_index *index;1587 unsigned depth;1588};15891590static int delta_cacheable(unsigned long src_size, unsigned long trg_size,1591 unsigned long delta_size)1592{1593 if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)1594 return 0;15951596 if (delta_size < cache_max_small_delta_size)1597 return 1;15981599 /* cache delta, if objects are large enough compared to delta size */1600 if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))1601 return 1;16021603 return 0;1604}16051606#ifndef NO_PTHREADS16071608static pthread_mutex_t read_mutex;1609#define read_lock() pthread_mutex_lock(&read_mutex)1610#define read_unlock() pthread_mutex_unlock(&read_mutex)16111612static pthread_mutex_t cache_mutex;1613#define cache_lock() pthread_mutex_lock(&cache_mutex)1614#define cache_unlock() pthread_mutex_unlock(&cache_mutex)16151616static pthread_mutex_t progress_mutex;1617#define progress_lock() pthread_mutex_lock(&progress_mutex)1618#define progress_unlock() pthread_mutex_unlock(&progress_mutex)16191620#else16211622#define read_lock() (void)01623#define read_unlock() (void)01624#define cache_lock() (void)01625#define cache_unlock() (void)01626#define progress_lock() (void)01627#define progress_unlock() (void)016281629#endif16301631static int try_delta(struct unpacked *trg, struct unpacked *src,1632 unsigned max_depth, unsigned long *mem_usage)1633{1634 struct object_entry *trg_entry = trg->entry;1635 struct object_entry *src_entry = src->entry;1636 unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;1637 unsigned ref_depth;1638 enum object_type type;1639 void *delta_buf;16401641 /* Don't bother doing diffs between different types */1642 if (trg_entry->type != src_entry->type)1643 return -1;16441645 /*1646 * We do not bother to try a delta that we discarded on an1647 * earlier try, but only when reusing delta data. Note that1648 * src_entry that is marked as the preferred_base should always1649 * be considered, as even if we produce a suboptimal delta against1650 * it, we will still save the transfer cost, as we already know1651 * the other side has it and we won't send src_entry at all.1652 */1653 if (reuse_delta && trg_entry->in_pack &&1654 trg_entry->in_pack == src_entry->in_pack &&1655 !src_entry->preferred_base &&1656 trg_entry->in_pack_type != OBJ_REF_DELTA &&1657 trg_entry->in_pack_type != OBJ_OFS_DELTA)1658 return 0;16591660 /* Let's not bust the allowed depth. */1661 if (src->depth >= max_depth)1662 return 0;16631664 /* Now some size filtering heuristics. */1665 trg_size = trg_entry->size;1666 if (!trg_entry->delta) {1667 max_size = trg_size/2 - 20;1668 ref_depth = 1;1669 } else {1670 max_size = trg_entry->delta_size;1671 ref_depth = trg->depth;1672 }1673 max_size = (uint64_t)max_size * (max_depth - src->depth) /1674 (max_depth - ref_depth + 1);1675 if (max_size == 0)1676 return 0;1677 src_size = src_entry->size;1678 sizediff = src_size < trg_size ? trg_size - src_size : 0;1679 if (sizediff >= max_size)1680 return 0;1681 if (trg_size < src_size / 32)1682 return 0;16831684 /* Load data if not already done */1685 if (!trg->data) {1686 read_lock();1687 trg->data = read_sha1_file(trg_entry->idx.sha1, &type, &sz);1688 read_unlock();1689 if (!trg->data)1690 die("object %s cannot be read",1691 sha1_to_hex(trg_entry->idx.sha1));1692 if (sz != trg_size)1693 die("object %s inconsistent object length (%lu vs %lu)",1694 sha1_to_hex(trg_entry->idx.sha1), sz, trg_size);1695 *mem_usage += sz;1696 }1697 if (!src->data) {1698 read_lock();1699 src->data = read_sha1_file(src_entry->idx.sha1, &type, &sz);1700 read_unlock();1701 if (!src->data) {1702 if (src_entry->preferred_base) {1703 static int warned = 0;1704 if (!warned++)1705 warning("object %s cannot be read",1706 sha1_to_hex(src_entry->idx.sha1));1707 /*1708 * Those objects are not included in the1709 * resulting pack. Be resilient and ignore1710 * them if they can't be read, in case the1711 * pack could be created nevertheless.1712 */1713 return 0;1714 }1715 die("object %s cannot be read",1716 sha1_to_hex(src_entry->idx.sha1));1717 }1718 if (sz != src_size)1719 die("object %s inconsistent object length (%lu vs %lu)",1720 sha1_to_hex(src_entry->idx.sha1), sz, src_size);1721 *mem_usage += sz;1722 }1723 if (!src->index) {1724 src->index = create_delta_index(src->data, src_size);1725 if (!src->index) {1726 static int warned = 0;1727 if (!warned++)1728 warning("suboptimal pack - out of memory");1729 return 0;1730 }1731 *mem_usage += sizeof_delta_index(src->index);1732 }17331734 delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);1735 if (!delta_buf)1736 return 0;17371738 if (trg_entry->delta) {1739 /* Prefer only shallower same-sized deltas. */1740 if (delta_size == trg_entry->delta_size &&1741 src->depth + 1 >= trg->depth) {1742 free(delta_buf);1743 return 0;1744 }1745 }17461747 /*1748 * Handle memory allocation outside of the cache1749 * accounting lock. Compiler will optimize the strangeness1750 * away when NO_PTHREADS is defined.1751 */1752 free(trg_entry->delta_data);1753 cache_lock();1754 if (trg_entry->delta_data) {1755 delta_cache_size -= trg_entry->delta_size;1756 trg_entry->delta_data = NULL;1757 }1758 if (delta_cacheable(src_size, trg_size, delta_size)) {1759 delta_cache_size += delta_size;1760 cache_unlock();1761 trg_entry->delta_data = xrealloc(delta_buf, delta_size);1762 } else {1763 cache_unlock();1764 free(delta_buf);1765 }17661767 trg_entry->delta = src_entry;1768 trg_entry->delta_size = delta_size;1769 trg->depth = src->depth + 1;17701771 return 1;1772}17731774static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)1775{1776 struct object_entry *child = me->delta_child;1777 unsigned int m = n;1778 while (child) {1779 unsigned int c = check_delta_limit(child, n + 1);1780 if (m < c)1781 m = c;1782 child = child->delta_sibling;1783 }1784 return m;1785}17861787static unsigned long free_unpacked(struct unpacked *n)1788{1789 unsigned long freed_mem = sizeof_delta_index(n->index);1790 free_delta_index(n->index);1791 n->index = NULL;1792 if (n->data) {1793 freed_mem += n->entry->size;1794 free(n->data);1795 n->data = NULL;1796 }1797 n->entry = NULL;1798 n->depth = 0;1799 return freed_mem;1800}18011802static void find_deltas(struct object_entry **list, unsigned *list_size,1803 int window, int depth, unsigned *processed)1804{1805 uint32_t i, idx = 0, count = 0;1806 struct unpacked *array;1807 unsigned long mem_usage = 0;18081809 array = xcalloc(window, sizeof(struct unpacked));18101811 for (;;) {1812 struct object_entry *entry;1813 struct unpacked *n = array + idx;1814 int j, max_depth, best_base = -1;18151816 progress_lock();1817 if (!*list_size) {1818 progress_unlock();1819 break;1820 }1821 entry = *list++;1822 (*list_size)--;1823 if (!entry->preferred_base) {1824 (*processed)++;1825 display_progress(progress_state, *processed);1826 }1827 progress_unlock();18281829 mem_usage -= free_unpacked(n);1830 n->entry = entry;18311832 while (window_memory_limit &&1833 mem_usage > window_memory_limit &&1834 count > 1) {1835 uint32_t tail = (idx + window - count) % window;1836 mem_usage -= free_unpacked(array + tail);1837 count--;1838 }18391840 /* We do not compute delta to *create* objects we are not1841 * going to pack.1842 */1843 if (entry->preferred_base)1844 goto next;18451846 /*1847 * If the current object is at pack edge, take the depth the1848 * objects that depend on the current object into account1849 * otherwise they would become too deep.1850 */1851 max_depth = depth;1852 if (entry->delta_child) {1853 max_depth -= check_delta_limit(entry, 0);1854 if (max_depth <= 0)1855 goto next;1856 }18571858 j = window;1859 while (--j > 0) {1860 int ret;1861 uint32_t other_idx = idx + j;1862 struct unpacked *m;1863 if (other_idx >= window)1864 other_idx -= window;1865 m = array + other_idx;1866 if (!m->entry)1867 break;1868 ret = try_delta(n, m, max_depth, &mem_usage);1869 if (ret < 0)1870 break;1871 else if (ret > 0)1872 best_base = other_idx;1873 }18741875 /*1876 * If we decided to cache the delta data, then it is best1877 * to compress it right away. First because we have to do1878 * it anyway, and doing it here while we're threaded will1879 * save a lot of time in the non threaded write phase,1880 * as well as allow for caching more deltas within1881 * the same cache size limit.1882 * ...1883 * But only if not writing to stdout, since in that case1884 * the network is most likely throttling writes anyway,1885 * and therefore it is best to go to the write phase ASAP1886 * instead, as we can afford spending more time compressing1887 * between writes at that moment.1888 */1889 if (entry->delta_data && !pack_to_stdout) {1890 entry->z_delta_size = do_compress(&entry->delta_data,1891 entry->delta_size);1892 cache_lock();1893 delta_cache_size -= entry->delta_size;1894 delta_cache_size += entry->z_delta_size;1895 cache_unlock();1896 }18971898 /* if we made n a delta, and if n is already at max1899 * depth, leaving it in the window is pointless. we1900 * should evict it first.1901 */1902 if (entry->delta && max_depth <= n->depth)1903 continue;19041905 /*1906 * Move the best delta base up in the window, after the1907 * currently deltified object, to keep it longer. It will1908 * be the first base object to be attempted next.1909 */1910 if (entry->delta) {1911 struct unpacked swap = array[best_base];1912 int dist = (window + idx - best_base) % window;1913 int dst = best_base;1914 while (dist--) {1915 int src = (dst + 1) % window;1916 array[dst] = array[src];1917 dst = src;1918 }1919 array[dst] = swap;1920 }19211922 next:1923 idx++;1924 if (count + 1 < window)1925 count++;1926 if (idx >= window)1927 idx = 0;1928 }19291930 for (i = 0; i < window; ++i) {1931 free_delta_index(array[i].index);1932 free(array[i].data);1933 }1934 free(array);1935}19361937#ifndef NO_PTHREADS19381939static void try_to_free_from_threads(size_t size)1940{1941 read_lock();1942 release_pack_memory(size);1943 read_unlock();1944}19451946static try_to_free_t old_try_to_free_routine;19471948/*1949 * The main thread waits on the condition that (at least) one of the workers1950 * has stopped working (which is indicated in the .working member of1951 * struct thread_params).1952 * When a work thread has completed its work, it sets .working to 0 and1953 * signals the main thread and waits on the condition that .data_ready1954 * becomes 1.1955 */19561957struct thread_params {1958 pthread_t thread;1959 struct object_entry **list;1960 unsigned list_size;1961 unsigned remaining;1962 int window;1963 int depth;1964 int working;1965 int data_ready;1966 pthread_mutex_t mutex;1967 pthread_cond_t cond;1968 unsigned *processed;1969};19701971static pthread_cond_t progress_cond;19721973/*1974 * Mutex and conditional variable can't be statically-initialized on Windows.1975 */1976static void init_threaded_search(void)1977{1978 init_recursive_mutex(&read_mutex);1979 pthread_mutex_init(&cache_mutex, NULL);1980 pthread_mutex_init(&progress_mutex, NULL);1981 pthread_cond_init(&progress_cond, NULL);1982 old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads);1983}19841985static void cleanup_threaded_search(void)1986{1987 set_try_to_free_routine(old_try_to_free_routine);1988 pthread_cond_destroy(&progress_cond);1989 pthread_mutex_destroy(&read_mutex);1990 pthread_mutex_destroy(&cache_mutex);1991 pthread_mutex_destroy(&progress_mutex);1992}19931994static void *threaded_find_deltas(void *arg)1995{1996 struct thread_params *me = arg;19971998 while (me->remaining) {1999 find_deltas(me->list, &me->remaining,2000 me->window, me->depth, me->processed);20012002 progress_lock();2003 me->working = 0;2004 pthread_cond_signal(&progress_cond);2005 progress_unlock();20062007 /*2008 * We must not set ->data_ready before we wait on the2009 * condition because the main thread may have set it to 12010 * before we get here. In order to be sure that new2011 * work is available if we see 1 in ->data_ready, it2012 * was initialized to 0 before this thread was spawned2013 * and we reset it to 0 right away.2014 */2015 pthread_mutex_lock(&me->mutex);2016 while (!me->data_ready)2017 pthread_cond_wait(&me->cond, &me->mutex);2018 me->data_ready = 0;2019 pthread_mutex_unlock(&me->mutex);2020 }2021 /* leave ->working 1 so that this doesn't get more work assigned */2022 return NULL;2023}20242025static void ll_find_deltas(struct object_entry **list, unsigned list_size,2026 int window, int depth, unsigned *processed)2027{2028 struct thread_params *p;2029 int i, ret, active_threads = 0;20302031 init_threaded_search();20322033 if (delta_search_threads <= 1) {2034 find_deltas(list, &list_size, window, depth, processed);2035 cleanup_threaded_search();2036 return;2037 }2038 if (progress > pack_to_stdout)2039 fprintf(stderr, "Delta compression using up to %d threads.\n",2040 delta_search_threads);2041 p = xcalloc(delta_search_threads, sizeof(*p));20422043 /* Partition the work amongst work threads. */2044 for (i = 0; i < delta_search_threads; i++) {2045 unsigned sub_size = list_size / (delta_search_threads - i);20462047 /* don't use too small segments or no deltas will be found */2048 if (sub_size < 2*window && i+1 < delta_search_threads)2049 sub_size = 0;20502051 p[i].window = window;2052 p[i].depth = depth;2053 p[i].processed = processed;2054 p[i].working = 1;2055 p[i].data_ready = 0;20562057 /* try to split chunks on "path" boundaries */2058 while (sub_size && sub_size < list_size &&2059 list[sub_size]->hash &&2060 list[sub_size]->hash == list[sub_size-1]->hash)2061 sub_size++;20622063 p[i].list = list;2064 p[i].list_size = sub_size;2065 p[i].remaining = sub_size;20662067 list += sub_size;2068 list_size -= sub_size;2069 }20702071 /* Start work threads. */2072 for (i = 0; i < delta_search_threads; i++) {2073 if (!p[i].list_size)2074 continue;2075 pthread_mutex_init(&p[i].mutex, NULL);2076 pthread_cond_init(&p[i].cond, NULL);2077 ret = pthread_create(&p[i].thread, NULL,2078 threaded_find_deltas, &p[i]);2079 if (ret)2080 die("unable to create thread: %s", strerror(ret));2081 active_threads++;2082 }20832084 /*2085 * Now let's wait for work completion. Each time a thread is done2086 * with its work, we steal half of the remaining work from the2087 * thread with the largest number of unprocessed objects and give2088 * it to that newly idle thread. This ensure good load balancing2089 * until the remaining object list segments are simply too short2090 * to be worth splitting anymore.2091 */2092 while (active_threads) {2093 struct thread_params *target = NULL;2094 struct thread_params *victim = NULL;2095 unsigned sub_size = 0;20962097 progress_lock();2098 for (;;) {2099 for (i = 0; !target && i < delta_search_threads; i++)2100 if (!p[i].working)2101 target = &p[i];2102 if (target)2103 break;2104 pthread_cond_wait(&progress_cond, &progress_mutex);2105 }21062107 for (i = 0; i < delta_search_threads; i++)2108 if (p[i].remaining > 2*window &&2109 (!victim || victim->remaining < p[i].remaining))2110 victim = &p[i];2111 if (victim) {2112 sub_size = victim->remaining / 2;2113 list = victim->list + victim->list_size - sub_size;2114 while (sub_size && list[0]->hash &&2115 list[0]->hash == list[-1]->hash) {2116 list++;2117 sub_size--;2118 }2119 if (!sub_size) {2120 /*2121 * It is possible for some "paths" to have2122 * so many objects that no hash boundary2123 * might be found. Let's just steal the2124 * exact half in that case.2125 */2126 sub_size = victim->remaining / 2;2127 list -= sub_size;2128 }2129 target->list = list;2130 victim->list_size -= sub_size;2131 victim->remaining -= sub_size;2132 }2133 target->list_size = sub_size;2134 target->remaining = sub_size;2135 target->working = 1;2136 progress_unlock();21372138 pthread_mutex_lock(&target->mutex);2139 target->data_ready = 1;2140 pthread_cond_signal(&target->cond);2141 pthread_mutex_unlock(&target->mutex);21422143 if (!sub_size) {2144 pthread_join(target->thread, NULL);2145 pthread_cond_destroy(&target->cond);2146 pthread_mutex_destroy(&target->mutex);2147 active_threads--;2148 }2149 }2150 cleanup_threaded_search();2151 free(p);2152}21532154#else2155#define ll_find_deltas(l, s, w, d, p) find_deltas(l, &s, w, d, p)2156#endif21572158static void add_tag_chain(const struct object_id *oid)2159{2160 struct tag *tag;21612162 /*2163 * We catch duplicates already in add_object_entry(), but we'd2164 * prefer to do this extra check to avoid having to parse the2165 * tag at all if we already know that it's being packed (e.g., if2166 * it was included via bitmaps, we would not have parsed it2167 * previously).2168 */2169 if (packlist_find(&to_pack, oid->hash, NULL))2170 return;21712172 tag = lookup_tag(oid->hash);2173 while (1) {2174 if (!tag || parse_tag(tag) || !tag->tagged)2175 die("unable to pack objects reachable from tag %s",2176 oid_to_hex(oid));21772178 add_object_entry(tag->object.oid.hash, OBJ_TAG, NULL, 0);21792180 if (tag->tagged->type != OBJ_TAG)2181 return;21822183 tag = (struct tag *)tag->tagged;2184 }2185}21862187static int add_ref_tag(const char *path, const struct object_id *oid, int flag, void *cb_data)2188{2189 struct object_id peeled;21902191 if (starts_with(path, "refs/tags/") && /* is a tag? */2192 !peel_ref(path, peeled.hash) && /* peelable? */2193 packlist_find(&to_pack, peeled.hash, NULL)) /* object packed? */2194 add_tag_chain(oid);2195 return 0;2196}21972198static void prepare_pack(int window, int depth)2199{2200 struct object_entry **delta_list;2201 uint32_t i, nr_deltas;2202 unsigned n;22032204 get_object_details();22052206 /*2207 * If we're locally repacking then we need to be doubly careful2208 * from now on in order to make sure no stealth corruption gets2209 * propagated to the new pack. Clients receiving streamed packs2210 * should validate everything they get anyway so no need to incur2211 * the additional cost here in that case.2212 */2213 if (!pack_to_stdout)2214 do_check_packed_object_crc = 1;22152216 if (!to_pack.nr_objects || !window || !depth)2217 return;22182219 ALLOC_ARRAY(delta_list, to_pack.nr_objects);2220 nr_deltas = n = 0;22212222 for (i = 0; i < to_pack.nr_objects; i++) {2223 struct object_entry *entry = to_pack.objects + i;22242225 if (entry->delta)2226 /* This happens if we decided to reuse existing2227 * delta from a pack. "reuse_delta &&" is implied.2228 */2229 continue;22302231 if (entry->size < 50)2232 continue;22332234 if (entry->no_try_delta)2235 continue;22362237 if (!entry->preferred_base) {2238 nr_deltas++;2239 if (entry->type < 0)2240 die("unable to get type of object %s",2241 sha1_to_hex(entry->idx.sha1));2242 } else {2243 if (entry->type < 0) {2244 /*2245 * This object is not found, but we2246 * don't have to include it anyway.2247 */2248 continue;2249 }2250 }22512252 delta_list[n++] = entry;2253 }22542255 if (nr_deltas && n > 1) {2256 unsigned nr_done = 0;2257 if (progress)2258 progress_state = start_progress(_("Compressing objects"),2259 nr_deltas);2260 qsort(delta_list, n, sizeof(*delta_list), type_size_sort);2261 ll_find_deltas(delta_list, n, window+1, depth, &nr_done);2262 stop_progress(&progress_state);2263 if (nr_done != nr_deltas)2264 die("inconsistency with delta count");2265 }2266 free(delta_list);2267}22682269static int git_pack_config(const char *k, const char *v, void *cb)2270{2271 if (!strcmp(k, "pack.window")) {2272 window = git_config_int(k, v);2273 return 0;2274 }2275 if (!strcmp(k, "pack.windowmemory")) {2276 window_memory_limit = git_config_ulong(k, v);2277 return 0;2278 }2279 if (!strcmp(k, "pack.depth")) {2280 depth = git_config_int(k, v);2281 return 0;2282 }2283 if (!strcmp(k, "pack.compression")) {2284 int level = git_config_int(k, v);2285 if (level == -1)2286 level = Z_DEFAULT_COMPRESSION;2287 else if (level < 0 || level > Z_BEST_COMPRESSION)2288 die("bad pack compression level %d", level);2289 pack_compression_level = level;2290 pack_compression_seen = 1;2291 return 0;2292 }2293 if (!strcmp(k, "pack.deltacachesize")) {2294 max_delta_cache_size = git_config_int(k, v);2295 return 0;2296 }2297 if (!strcmp(k, "pack.deltacachelimit")) {2298 cache_max_small_delta_size = git_config_int(k, v);2299 return 0;2300 }2301 if (!strcmp(k, "pack.writebitmaphashcache")) {2302 if (git_config_bool(k, v))2303 write_bitmap_options |= BITMAP_OPT_HASH_CACHE;2304 else2305 write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;2306 }2307 if (!strcmp(k, "pack.usebitmaps")) {2308 use_bitmap_index_default = git_config_bool(k, v);2309 return 0;2310 }2311 if (!strcmp(k, "pack.threads")) {2312 delta_search_threads = git_config_int(k, v);2313 if (delta_search_threads < 0)2314 die("invalid number of threads specified (%d)",2315 delta_search_threads);2316#ifdef NO_PTHREADS2317 if (delta_search_threads != 1)2318 warning("no threads support, ignoring %s", k);2319#endif2320 return 0;2321 }2322 if (!strcmp(k, "pack.indexversion")) {2323 pack_idx_opts.version = git_config_int(k, v);2324 if (pack_idx_opts.version > 2)2325 die("bad pack.indexversion=%"PRIu32,2326 pack_idx_opts.version);2327 return 0;2328 }2329 return git_default_config(k, v, cb);2330}23312332static void read_object_list_from_stdin(void)2333{2334 char line[40 + 1 + PATH_MAX + 2];2335 unsigned char sha1[20];23362337 for (;;) {2338 if (!fgets(line, sizeof(line), stdin)) {2339 if (feof(stdin))2340 break;2341 if (!ferror(stdin))2342 die("fgets returned NULL, not EOF, not error!");2343 if (errno != EINTR)2344 die_errno("fgets");2345 clearerr(stdin);2346 continue;2347 }2348 if (line[0] == '-') {2349 if (get_sha1_hex(line+1, sha1))2350 die("expected edge sha1, got garbage:\n %s",2351 line);2352 add_preferred_base(sha1);2353 continue;2354 }2355 if (get_sha1_hex(line, sha1))2356 die("expected sha1, got garbage:\n %s", line);23572358 add_preferred_base_object(line+41);2359 add_object_entry(sha1, 0, line+41, 0);2360 }2361}23622363#define OBJECT_ADDED (1u<<20)23642365static void show_commit(struct commit *commit, void *data)2366{2367 add_object_entry(commit->object.oid.hash, OBJ_COMMIT, NULL, 0);2368 commit->object.flags |= OBJECT_ADDED;23692370 if (write_bitmap_index)2371 index_commit_for_bitmap(commit);2372}23732374static void show_object(struct object *obj, const char *name, void *data)2375{2376 add_preferred_base_object(name);2377 add_object_entry(obj->oid.hash, obj->type, name, 0);2378 obj->flags |= OBJECT_ADDED;2379}23802381static void show_edge(struct commit *commit)2382{2383 add_preferred_base(commit->object.oid.hash);2384}23852386struct in_pack_object {2387 off_t offset;2388 struct object *object;2389};23902391struct in_pack {2392 int alloc;2393 int nr;2394 struct in_pack_object *array;2395};23962397static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)2398{2399 in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->oid.hash, p);2400 in_pack->array[in_pack->nr].object = object;2401 in_pack->nr++;2402}24032404/*2405 * Compare the objects in the offset order, in order to emulate the2406 * "git rev-list --objects" output that produced the pack originally.2407 */2408static int ofscmp(const void *a_, const void *b_)2409{2410 struct in_pack_object *a = (struct in_pack_object *)a_;2411 struct in_pack_object *b = (struct in_pack_object *)b_;24122413 if (a->offset < b->offset)2414 return -1;2415 else if (a->offset > b->offset)2416 return 1;2417 else2418 return oidcmp(&a->object->oid, &b->object->oid);2419}24202421static void add_objects_in_unpacked_packs(struct rev_info *revs)2422{2423 struct packed_git *p;2424 struct in_pack in_pack;2425 uint32_t i;24262427 memset(&in_pack, 0, sizeof(in_pack));24282429 for (p = packed_git; p; p = p->next) {2430 const unsigned char *sha1;2431 struct object *o;24322433 if (!p->pack_local || p->pack_keep)2434 continue;2435 if (open_pack_index(p))2436 die("cannot open pack index");24372438 ALLOC_GROW(in_pack.array,2439 in_pack.nr + p->num_objects,2440 in_pack.alloc);24412442 for (i = 0; i < p->num_objects; i++) {2443 sha1 = nth_packed_object_sha1(p, i);2444 o = lookup_unknown_object(sha1);2445 if (!(o->flags & OBJECT_ADDED))2446 mark_in_pack_object(o, p, &in_pack);2447 o->flags |= OBJECT_ADDED;2448 }2449 }24502451 if (in_pack.nr) {2452 qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]),2453 ofscmp);2454 for (i = 0; i < in_pack.nr; i++) {2455 struct object *o = in_pack.array[i].object;2456 add_object_entry(o->oid.hash, o->type, "", 0);2457 }2458 }2459 free(in_pack.array);2460}24612462static int add_loose_object(const unsigned char *sha1, const char *path,2463 void *data)2464{2465 enum object_type type = sha1_object_info(sha1, NULL);24662467 if (type < 0) {2468 warning("loose object at %s could not be examined", path);2469 return 0;2470 }24712472 add_object_entry(sha1, type, "", 0);2473 return 0;2474}24752476/*2477 * We actually don't even have to worry about reachability here.2478 * add_object_entry will weed out duplicates, so we just add every2479 * loose object we find.2480 */2481static void add_unreachable_loose_objects(void)2482{2483 for_each_loose_file_in_objdir(get_object_directory(),2484 add_loose_object,2485 NULL, NULL, NULL);2486}24872488static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1)2489{2490 static struct packed_git *last_found = (void *)1;2491 struct packed_git *p;24922493 p = (last_found != (void *)1) ? last_found : packed_git;24942495 while (p) {2496 if ((!p->pack_local || p->pack_keep) &&2497 find_pack_entry_one(sha1, p)) {2498 last_found = p;2499 return 1;2500 }2501 if (p == last_found)2502 p = packed_git;2503 else2504 p = p->next;2505 if (p == last_found)2506 p = p->next;2507 }2508 return 0;2509}25102511/*2512 * Store a list of sha1s that are should not be discarded2513 * because they are either written too recently, or are2514 * reachable from another object that was.2515 *2516 * This is filled by get_object_list.2517 */2518static struct sha1_array recent_objects;25192520static int loosened_object_can_be_discarded(const unsigned char *sha1,2521 unsigned long mtime)2522{2523 if (!unpack_unreachable_expiration)2524 return 0;2525 if (mtime > unpack_unreachable_expiration)2526 return 0;2527 if (sha1_array_lookup(&recent_objects, sha1) >= 0)2528 return 0;2529 return 1;2530}25312532static void loosen_unused_packed_objects(struct rev_info *revs)2533{2534 struct packed_git *p;2535 uint32_t i;2536 const unsigned char *sha1;25372538 for (p = packed_git; p; p = p->next) {2539 if (!p->pack_local || p->pack_keep)2540 continue;25412542 if (open_pack_index(p))2543 die("cannot open pack index");25442545 for (i = 0; i < p->num_objects; i++) {2546 sha1 = nth_packed_object_sha1(p, i);2547 if (!packlist_find(&to_pack, sha1, NULL) &&2548 !has_sha1_pack_kept_or_nonlocal(sha1) &&2549 !loosened_object_can_be_discarded(sha1, p->mtime))2550 if (force_object_loose(sha1, p->mtime))2551 die("unable to force loose object");2552 }2553 }2554}25552556/*2557 * This tracks any options which pack-reuse code expects to be on, or which a2558 * reader of the pack might not understand, and which would therefore prevent2559 * blind reuse of what we have on disk.2560 */2561static int pack_options_allow_reuse(void)2562{2563 return pack_to_stdout && allow_ofs_delta;2564}25652566static int get_object_list_from_bitmap(struct rev_info *revs)2567{2568 if (prepare_bitmap_walk(revs) < 0)2569 return -1;25702571 if (pack_options_allow_reuse() &&2572 !reuse_partial_packfile_from_bitmap(2573 &reuse_packfile,2574 &reuse_packfile_objects,2575 &reuse_packfile_offset)) {2576 assert(reuse_packfile_objects);2577 nr_result += reuse_packfile_objects;2578 display_progress(progress_state, nr_result);2579 }25802581 traverse_bitmap_commit_list(&add_object_entry_from_bitmap);2582 return 0;2583}25842585static void record_recent_object(struct object *obj,2586 const char *name,2587 void *data)2588{2589 sha1_array_append(&recent_objects, obj->oid.hash);2590}25912592static void record_recent_commit(struct commit *commit, void *data)2593{2594 sha1_array_append(&recent_objects, commit->object.oid.hash);2595}25962597static void get_object_list(int ac, const char **av)2598{2599 struct rev_info revs;2600 char line[1000];2601 int flags = 0;26022603 init_revisions(&revs, NULL);2604 save_commit_buffer = 0;2605 setup_revisions(ac, av, &revs, NULL);26062607 /* make sure shallows are read */2608 is_repository_shallow();26092610 while (fgets(line, sizeof(line), stdin) != NULL) {2611 int len = strlen(line);2612 if (len && line[len - 1] == '\n')2613 line[--len] = 0;2614 if (!len)2615 break;2616 if (*line == '-') {2617 if (!strcmp(line, "--not")) {2618 flags ^= UNINTERESTING;2619 write_bitmap_index = 0;2620 continue;2621 }2622 if (starts_with(line, "--shallow ")) {2623 unsigned char sha1[20];2624 if (get_sha1_hex(line + 10, sha1))2625 die("not an SHA-1 '%s'", line + 10);2626 register_shallow(sha1);2627 use_bitmap_index = 0;2628 continue;2629 }2630 die("not a rev '%s'", line);2631 }2632 if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME))2633 die("bad revision '%s'", line);2634 }26352636 if (use_bitmap_index && !get_object_list_from_bitmap(&revs))2637 return;26382639 if (prepare_revision_walk(&revs))2640 die("revision walk setup failed");2641 mark_edges_uninteresting(&revs, show_edge);2642 traverse_commit_list(&revs, show_commit, show_object, NULL);26432644 if (unpack_unreachable_expiration) {2645 revs.ignore_missing_links = 1;2646 if (add_unseen_recent_objects_to_traversal(&revs,2647 unpack_unreachable_expiration))2648 die("unable to add recent objects");2649 if (prepare_revision_walk(&revs))2650 die("revision walk setup failed");2651 traverse_commit_list(&revs, record_recent_commit,2652 record_recent_object, NULL);2653 }26542655 if (keep_unreachable)2656 add_objects_in_unpacked_packs(&revs);2657 if (pack_loose_unreachable)2658 add_unreachable_loose_objects();2659 if (unpack_unreachable)2660 loosen_unused_packed_objects(&revs);26612662 sha1_array_clear(&recent_objects);2663}26642665static int option_parse_index_version(const struct option *opt,2666 const char *arg, int unset)2667{2668 char *c;2669 const char *val = arg;2670 pack_idx_opts.version = strtoul(val, &c, 10);2671 if (pack_idx_opts.version > 2)2672 die(_("unsupported index version %s"), val);2673 if (*c == ',' && c[1])2674 pack_idx_opts.off32_limit = strtoul(c+1, &c, 0);2675 if (*c || pack_idx_opts.off32_limit & 0x80000000)2676 die(_("bad index version '%s'"), val);2677 return 0;2678}26792680static int option_parse_unpack_unreachable(const struct option *opt,2681 const char *arg, int unset)2682{2683 if (unset) {2684 unpack_unreachable = 0;2685 unpack_unreachable_expiration = 0;2686 }2687 else {2688 unpack_unreachable = 1;2689 if (arg)2690 unpack_unreachable_expiration = approxidate(arg);2691 }2692 return 0;2693}26942695int cmd_pack_objects(int argc, const char **argv, const char *prefix)2696{2697 int use_internal_rev_list = 0;2698 int thin = 0;2699 int shallow = 0;2700 int all_progress_implied = 0;2701 struct argv_array rp = ARGV_ARRAY_INIT;2702 int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;2703 int rev_list_index = 0;2704 struct option pack_objects_options[] = {2705 OPT_SET_INT('q', "quiet", &progress,2706 N_("do not show progress meter"), 0),2707 OPT_SET_INT(0, "progress", &progress,2708 N_("show progress meter"), 1),2709 OPT_SET_INT(0, "all-progress", &progress,2710 N_("show progress meter during object writing phase"), 2),2711 OPT_BOOL(0, "all-progress-implied",2712 &all_progress_implied,2713 N_("similar to --all-progress when progress meter is shown")),2714 { OPTION_CALLBACK, 0, "index-version", NULL, N_("version[,offset]"),2715 N_("write the pack index file in the specified idx format version"),2716 0, option_parse_index_version },2717 OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit,2718 N_("maximum size of each output pack file")),2719 OPT_BOOL(0, "local", &local,2720 N_("ignore borrowed objects from alternate object store")),2721 OPT_BOOL(0, "incremental", &incremental,2722 N_("ignore packed objects")),2723 OPT_INTEGER(0, "window", &window,2724 N_("limit pack window by objects")),2725 OPT_MAGNITUDE(0, "window-memory", &window_memory_limit,2726 N_("limit pack window by memory in addition to object limit")),2727 OPT_INTEGER(0, "depth", &depth,2728 N_("maximum length of delta chain allowed in the resulting pack")),2729 OPT_BOOL(0, "reuse-delta", &reuse_delta,2730 N_("reuse existing deltas")),2731 OPT_BOOL(0, "reuse-object", &reuse_object,2732 N_("reuse existing objects")),2733 OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,2734 N_("use OFS_DELTA objects")),2735 OPT_INTEGER(0, "threads", &delta_search_threads,2736 N_("use threads when searching for best delta matches")),2737 OPT_BOOL(0, "non-empty", &non_empty,2738 N_("do not create an empty pack output")),2739 OPT_BOOL(0, "revs", &use_internal_rev_list,2740 N_("read revision arguments from standard input")),2741 { OPTION_SET_INT, 0, "unpacked", &rev_list_unpacked, NULL,2742 N_("limit the objects to those that are not yet packed"),2743 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },2744 { OPTION_SET_INT, 0, "all", &rev_list_all, NULL,2745 N_("include objects reachable from any reference"),2746 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },2747 { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL,2748 N_("include objects referred by reflog entries"),2749 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },2750 { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL,2751 N_("include objects referred to by the index"),2752 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },2753 OPT_BOOL(0, "stdout", &pack_to_stdout,2754 N_("output pack to stdout")),2755 OPT_BOOL(0, "include-tag", &include_tag,2756 N_("include tag objects that refer to objects to be packed")),2757 OPT_BOOL(0, "keep-unreachable", &keep_unreachable,2758 N_("keep unreachable objects")),2759 OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,2760 N_("pack loose unreachable objects")),2761 { OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"),2762 N_("unpack unreachable objects newer than <time>"),2763 PARSE_OPT_OPTARG, option_parse_unpack_unreachable },2764 OPT_BOOL(0, "thin", &thin,2765 N_("create thin packs")),2766 OPT_BOOL(0, "shallow", &shallow,2767 N_("create packs suitable for shallow fetches")),2768 OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep,2769 N_("ignore packs that have companion .keep file")),2770 OPT_INTEGER(0, "compression", &pack_compression_level,2771 N_("pack compression level")),2772 OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,2773 N_("do not hide commits by grafts"), 0),2774 OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,2775 N_("use a bitmap index if available to speed up counting objects")),2776 OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index,2777 N_("write a bitmap index together with the pack index")),2778 OPT_END(),2779 };27802781 check_replace_refs = 0;27822783 reset_pack_idx_option(&pack_idx_opts);2784 git_config(git_pack_config, NULL);2785 if (!pack_compression_seen && core_compression_seen)2786 pack_compression_level = core_compression_level;27872788 progress = isatty(2);2789 argc = parse_options(argc, argv, prefix, pack_objects_options,2790 pack_usage, 0);27912792 if (argc) {2793 base_name = argv[0];2794 argc--;2795 }2796 if (pack_to_stdout != !base_name || argc)2797 usage_with_options(pack_usage, pack_objects_options);27982799 argv_array_push(&rp, "pack-objects");2800 if (thin) {2801 use_internal_rev_list = 1;2802 argv_array_push(&rp, shallow2803 ? "--objects-edge-aggressive"2804 : "--objects-edge");2805 } else2806 argv_array_push(&rp, "--objects");28072808 if (rev_list_all) {2809 use_internal_rev_list = 1;2810 argv_array_push(&rp, "--all");2811 }2812 if (rev_list_reflog) {2813 use_internal_rev_list = 1;2814 argv_array_push(&rp, "--reflog");2815 }2816 if (rev_list_index) {2817 use_internal_rev_list = 1;2818 argv_array_push(&rp, "--indexed-objects");2819 }2820 if (rev_list_unpacked) {2821 use_internal_rev_list = 1;2822 argv_array_push(&rp, "--unpacked");2823 }28242825 if (!reuse_object)2826 reuse_delta = 0;2827 if (pack_compression_level == -1)2828 pack_compression_level = Z_DEFAULT_COMPRESSION;2829 else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)2830 die("bad pack compression level %d", pack_compression_level);28312832 if (!delta_search_threads) /* --threads=0 means autodetect */2833 delta_search_threads = online_cpus();28342835#ifdef NO_PTHREADS2836 if (delta_search_threads != 1)2837 warning("no threads support, ignoring --threads");2838#endif2839 if (!pack_to_stdout && !pack_size_limit)2840 pack_size_limit = pack_size_limit_cfg;2841 if (pack_to_stdout && pack_size_limit)2842 die("--max-pack-size cannot be used to build a pack for transfer.");2843 if (pack_size_limit && pack_size_limit < 1024*1024) {2844 warning("minimum pack size limit is 1 MiB");2845 pack_size_limit = 1024*1024;2846 }28472848 if (!pack_to_stdout && thin)2849 die("--thin cannot be used to build an indexable pack.");28502851 if (keep_unreachable && unpack_unreachable)2852 die("--keep-unreachable and --unpack-unreachable are incompatible.");2853 if (!rev_list_all || !rev_list_reflog || !rev_list_index)2854 unpack_unreachable_expiration = 0;28552856 /*2857 * "soft" reasons not to use bitmaps - for on-disk repack by default we want2858 *2859 * - to produce good pack (with bitmap index not-yet-packed objects are2860 * packed in suboptimal order).2861 *2862 * - to use more robust pack-generation codepath (avoiding possible2863 * bugs in bitmap code and possible bitmap index corruption).2864 */2865 if (!pack_to_stdout)2866 use_bitmap_index_default = 0;28672868 if (use_bitmap_index < 0)2869 use_bitmap_index = use_bitmap_index_default;28702871 /* "hard" reasons not to use bitmaps; these just won't work at all */2872 if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow())2873 use_bitmap_index = 0;28742875 if (pack_to_stdout || !rev_list_all)2876 write_bitmap_index = 0;28772878 if (progress && all_progress_implied)2879 progress = 2;28802881 prepare_packed_git();2882 if (ignore_packed_keep) {2883 struct packed_git *p;2884 for (p = packed_git; p; p = p->next)2885 if (p->pack_local && p->pack_keep)2886 break;2887 if (!p) /* no keep-able packs found */2888 ignore_packed_keep = 0;2889 }2890 if (local) {2891 /*2892 * unlike ignore_packed_keep above, we do not want to2893 * unset "local" based on looking at packs, as it2894 * also covers non-local objects2895 */2896 struct packed_git *p;2897 for (p = packed_git; p; p = p->next) {2898 if (!p->pack_local) {2899 have_non_local_packs = 1;2900 break;2901 }2902 }2903 }29042905 if (progress)2906 progress_state = start_progress(_("Counting objects"), 0);2907 if (!use_internal_rev_list)2908 read_object_list_from_stdin();2909 else {2910 get_object_list(rp.argc, rp.argv);2911 argv_array_clear(&rp);2912 }2913 cleanup_preferred_base();2914 if (include_tag && nr_result)2915 for_each_ref(add_ref_tag, NULL);2916 stop_progress(&progress_state);29172918 if (non_empty && !nr_result)2919 return 0;2920 if (nr_result)2921 prepare_pack(window, depth);2922 write_pack_file();2923 if (progress)2924 fprintf(stderr, "Total %"PRIu32" (delta %"PRIu32"),"2925 " reused %"PRIu32" (delta %"PRIu32")\n",2926 written, written_delta, reused, reused_delta);2927 return 0;2928}