1#include"builtin.h" 2#include"cache.h" 3#include"config.h" 4#include"attr.h" 5#include"object.h" 6#include"blob.h" 7#include"commit.h" 8#include"tag.h" 9#include"tree.h" 10#include"delta.h" 11#include"pack.h" 12#include"pack-revindex.h" 13#include"csum-file.h" 14#include"tree-walk.h" 15#include"diff.h" 16#include"revision.h" 17#include"list-objects.h" 18#include"pack-objects.h" 19#include"progress.h" 20#include"refs.h" 21#include"streaming.h" 22#include"thread-utils.h" 23#include"pack-bitmap.h" 24#include"reachable.h" 25#include"sha1-array.h" 26#include"argv-array.h" 27#include"mru.h" 28 29static const char*pack_usage[] = { 30N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"), 31N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"), 32 NULL 33}; 34 35/* 36 * Objects we are going to pack are collected in the `to_pack` structure. 37 * It contains an array (dynamically expanded) of the object data, and a map 38 * that can resolve SHA1s to their position in the array. 39 */ 40static struct packing_data to_pack; 41 42static struct pack_idx_entry **written_list; 43static uint32_t nr_result, nr_written; 44 45static int non_empty; 46static int reuse_delta =1, reuse_object =1; 47static int keep_unreachable, unpack_unreachable, include_tag; 48static unsigned long unpack_unreachable_expiration; 49static int pack_loose_unreachable; 50static int local; 51static int have_non_local_packs; 52static int incremental; 53static int ignore_packed_keep; 54static int allow_ofs_delta; 55static struct pack_idx_option pack_idx_opts; 56static const char*base_name; 57static int progress =1; 58static int window =10; 59static unsigned long pack_size_limit; 60static int depth =50; 61static int delta_search_threads; 62static int pack_to_stdout; 63static int num_preferred_base; 64static struct progress *progress_state; 65 66static struct packed_git *reuse_packfile; 67static uint32_t reuse_packfile_objects; 68static off_t reuse_packfile_offset; 69 70static int use_bitmap_index_default =1; 71static int use_bitmap_index = -1; 72static int write_bitmap_index; 73static uint16_t write_bitmap_options; 74 75static unsigned long delta_cache_size =0; 76static unsigned long max_delta_cache_size =256*1024*1024; 77static unsigned long cache_max_small_delta_size =1000; 78 79static unsigned long window_memory_limit =0; 80 81/* 82 * stats 83 */ 84static uint32_t written, written_delta; 85static uint32_t reused, reused_delta; 86 87/* 88 * Indexed commits 89 */ 90static struct commit **indexed_commits; 91static unsigned int indexed_commits_nr; 92static unsigned int indexed_commits_alloc; 93 94static voidindex_commit_for_bitmap(struct commit *commit) 95{ 96if(indexed_commits_nr >= indexed_commits_alloc) { 97 indexed_commits_alloc = (indexed_commits_alloc +32) *2; 98REALLOC_ARRAY(indexed_commits, indexed_commits_alloc); 99} 100 101 indexed_commits[indexed_commits_nr++] = commit; 102} 103 104static void*get_delta(struct object_entry *entry) 105{ 106unsigned long size, base_size, delta_size; 107void*buf, *base_buf, *delta_buf; 108enum object_type type; 109 110 buf =read_sha1_file(entry->idx.sha1, &type, &size); 111if(!buf) 112die("unable to read%s",sha1_to_hex(entry->idx.sha1)); 113 base_buf =read_sha1_file(entry->delta->idx.sha1, &type, &base_size); 114if(!base_buf) 115die("unable to read%s",sha1_to_hex(entry->delta->idx.sha1)); 116 delta_buf =diff_delta(base_buf, base_size, 117 buf, size, &delta_size,0); 118if(!delta_buf || delta_size != entry->delta_size) 119die("delta size changed"); 120free(buf); 121free(base_buf); 122return delta_buf; 123} 124 125static unsigned longdo_compress(void**pptr,unsigned long size) 126{ 127 git_zstream stream; 128void*in, *out; 129unsigned long maxsize; 130 131git_deflate_init(&stream, pack_compression_level); 132 maxsize =git_deflate_bound(&stream, size); 133 134 in = *pptr; 135 out =xmalloc(maxsize); 136*pptr = out; 137 138 stream.next_in = in; 139 stream.avail_in = size; 140 stream.next_out = out; 141 stream.avail_out = maxsize; 142while(git_deflate(&stream, Z_FINISH) == Z_OK) 143;/* nothing */ 144git_deflate_end(&stream); 145 146free(in); 147return stream.total_out; 148} 149 150static unsigned longwrite_large_blob_data(struct git_istream *st,struct sha1file *f, 151const unsigned char*sha1) 152{ 153 git_zstream stream; 154unsigned char ibuf[1024*16]; 155unsigned char obuf[1024*16]; 156unsigned long olen =0; 157 158git_deflate_init(&stream, pack_compression_level); 159 160for(;;) { 161 ssize_t readlen; 162int zret = Z_OK; 163 readlen =read_istream(st, ibuf,sizeof(ibuf)); 164if(readlen == -1) 165die(_("unable to read%s"),sha1_to_hex(sha1)); 166 167 stream.next_in = ibuf; 168 stream.avail_in = readlen; 169while((stream.avail_in || readlen ==0) && 170(zret == Z_OK || zret == Z_BUF_ERROR)) { 171 stream.next_out = obuf; 172 stream.avail_out =sizeof(obuf); 173 zret =git_deflate(&stream, readlen ?0: Z_FINISH); 174sha1write(f, obuf, stream.next_out - obuf); 175 olen += stream.next_out - obuf; 176} 177if(stream.avail_in) 178die(_("deflate error (%d)"), zret); 179if(readlen ==0) { 180if(zret != Z_STREAM_END) 181die(_("deflate error (%d)"), zret); 182break; 183} 184} 185git_deflate_end(&stream); 186return olen; 187} 188 189/* 190 * we are going to reuse the existing object data as is. make 191 * sure it is not corrupt. 192 */ 193static intcheck_pack_inflate(struct packed_git *p, 194struct pack_window **w_curs, 195 off_t offset, 196 off_t len, 197unsigned long expect) 198{ 199 git_zstream stream; 200unsigned char fakebuf[4096], *in; 201int st; 202 203memset(&stream,0,sizeof(stream)); 204git_inflate_init(&stream); 205do{ 206 in =use_pack(p, w_curs, offset, &stream.avail_in); 207 stream.next_in = in; 208 stream.next_out = fakebuf; 209 stream.avail_out =sizeof(fakebuf); 210 st =git_inflate(&stream, Z_FINISH); 211 offset += stream.next_in - in; 212}while(st == Z_OK || st == Z_BUF_ERROR); 213git_inflate_end(&stream); 214return(st == Z_STREAM_END && 215 stream.total_out == expect && 216 stream.total_in == len) ?0: -1; 217} 218 219static voidcopy_pack_data(struct sha1file *f, 220struct packed_git *p, 221struct pack_window **w_curs, 222 off_t offset, 223 off_t len) 224{ 225unsigned char*in; 226unsigned long avail; 227 228while(len) { 229 in =use_pack(p, w_curs, offset, &avail); 230if(avail > len) 231 avail = (unsigned long)len; 232sha1write(f, in, avail); 233 offset += avail; 234 len -= avail; 235} 236} 237 238/* Return 0 if we will bust the pack-size limit */ 239static unsigned longwrite_no_reuse_object(struct sha1file *f,struct object_entry *entry, 240unsigned long limit,int usable_delta) 241{ 242unsigned long size, datalen; 243unsigned char header[MAX_PACK_OBJECT_HEADER], 244 dheader[MAX_PACK_OBJECT_HEADER]; 245unsigned hdrlen; 246enum object_type type; 247void*buf; 248struct git_istream *st = NULL; 249 250if(!usable_delta) { 251if(entry->type == OBJ_BLOB && 252 entry->size > big_file_threshold && 253(st =open_istream(entry->idx.sha1, &type, &size, NULL)) != NULL) 254 buf = NULL; 255else{ 256 buf =read_sha1_file(entry->idx.sha1, &type, &size); 257if(!buf) 258die(_("unable to read%s"),sha1_to_hex(entry->idx.sha1)); 259} 260/* 261 * make sure no cached delta data remains from a 262 * previous attempt before a pack split occurred. 263 */ 264free(entry->delta_data); 265 entry->delta_data = NULL; 266 entry->z_delta_size =0; 267}else if(entry->delta_data) { 268 size = entry->delta_size; 269 buf = entry->delta_data; 270 entry->delta_data = NULL; 271 type = (allow_ofs_delta && entry->delta->idx.offset) ? 272 OBJ_OFS_DELTA : OBJ_REF_DELTA; 273}else{ 274 buf =get_delta(entry); 275 size = entry->delta_size; 276 type = (allow_ofs_delta && entry->delta->idx.offset) ? 277 OBJ_OFS_DELTA : OBJ_REF_DELTA; 278} 279 280if(st)/* large blob case, just assume we don't compress well */ 281 datalen = size; 282else if(entry->z_delta_size) 283 datalen = entry->z_delta_size; 284else 285 datalen =do_compress(&buf, size); 286 287/* 288 * The object header is a byte of 'type' followed by zero or 289 * more bytes of length. 290 */ 291 hdrlen =encode_in_pack_object_header(header,sizeof(header), 292 type, size); 293 294if(type == OBJ_OFS_DELTA) { 295/* 296 * Deltas with relative base contain an additional 297 * encoding of the relative offset for the delta 298 * base from this object's position in the pack. 299 */ 300 off_t ofs = entry->idx.offset - entry->delta->idx.offset; 301unsigned pos =sizeof(dheader) -1; 302 dheader[pos] = ofs &127; 303while(ofs >>=7) 304 dheader[--pos] =128| (--ofs &127); 305if(limit && hdrlen +sizeof(dheader) - pos + datalen +20>= limit) { 306if(st) 307close_istream(st); 308free(buf); 309return0; 310} 311sha1write(f, header, hdrlen); 312sha1write(f, dheader + pos,sizeof(dheader) - pos); 313 hdrlen +=sizeof(dheader) - pos; 314}else if(type == OBJ_REF_DELTA) { 315/* 316 * Deltas with a base reference contain 317 * an additional 20 bytes for the base sha1. 318 */ 319if(limit && hdrlen +20+ datalen +20>= limit) { 320if(st) 321close_istream(st); 322free(buf); 323return0; 324} 325sha1write(f, header, hdrlen); 326sha1write(f, entry->delta->idx.sha1,20); 327 hdrlen +=20; 328}else{ 329if(limit && hdrlen + datalen +20>= limit) { 330if(st) 331close_istream(st); 332free(buf); 333return0; 334} 335sha1write(f, header, hdrlen); 336} 337if(st) { 338 datalen =write_large_blob_data(st, f, entry->idx.sha1); 339close_istream(st); 340}else{ 341sha1write(f, buf, datalen); 342free(buf); 343} 344 345return hdrlen + datalen; 346} 347 348/* Return 0 if we will bust the pack-size limit */ 349static off_t write_reuse_object(struct sha1file *f,struct object_entry *entry, 350unsigned long limit,int usable_delta) 351{ 352struct packed_git *p = entry->in_pack; 353struct pack_window *w_curs = NULL; 354struct revindex_entry *revidx; 355 off_t offset; 356enum object_type type = entry->type; 357 off_t datalen; 358unsigned char header[MAX_PACK_OBJECT_HEADER], 359 dheader[MAX_PACK_OBJECT_HEADER]; 360unsigned hdrlen; 361 362if(entry->delta) 363 type = (allow_ofs_delta && entry->delta->idx.offset) ? 364 OBJ_OFS_DELTA : OBJ_REF_DELTA; 365 hdrlen =encode_in_pack_object_header(header,sizeof(header), 366 type, entry->size); 367 368 offset = entry->in_pack_offset; 369 revidx =find_pack_revindex(p, offset); 370 datalen = revidx[1].offset - offset; 371if(!pack_to_stdout && p->index_version >1&& 372check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) { 373error("bad packed object CRC for%s",sha1_to_hex(entry->idx.sha1)); 374unuse_pack(&w_curs); 375returnwrite_no_reuse_object(f, entry, limit, usable_delta); 376} 377 378 offset += entry->in_pack_header_size; 379 datalen -= entry->in_pack_header_size; 380 381if(!pack_to_stdout && p->index_version ==1&& 382check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) { 383error("corrupt packed object for%s",sha1_to_hex(entry->idx.sha1)); 384unuse_pack(&w_curs); 385returnwrite_no_reuse_object(f, entry, limit, usable_delta); 386} 387 388if(type == OBJ_OFS_DELTA) { 389 off_t ofs = entry->idx.offset - entry->delta->idx.offset; 390unsigned pos =sizeof(dheader) -1; 391 dheader[pos] = ofs &127; 392while(ofs >>=7) 393 dheader[--pos] =128| (--ofs &127); 394if(limit && hdrlen +sizeof(dheader) - pos + datalen +20>= limit) { 395unuse_pack(&w_curs); 396return0; 397} 398sha1write(f, header, hdrlen); 399sha1write(f, dheader + pos,sizeof(dheader) - pos); 400 hdrlen +=sizeof(dheader) - pos; 401 reused_delta++; 402}else if(type == OBJ_REF_DELTA) { 403if(limit && hdrlen +20+ datalen +20>= limit) { 404unuse_pack(&w_curs); 405return0; 406} 407sha1write(f, header, hdrlen); 408sha1write(f, entry->delta->idx.sha1,20); 409 hdrlen +=20; 410 reused_delta++; 411}else{ 412if(limit && hdrlen + datalen +20>= limit) { 413unuse_pack(&w_curs); 414return0; 415} 416sha1write(f, header, hdrlen); 417} 418copy_pack_data(f, p, &w_curs, offset, datalen); 419unuse_pack(&w_curs); 420 reused++; 421return hdrlen + datalen; 422} 423 424/* Return 0 if we will bust the pack-size limit */ 425static off_t write_object(struct sha1file *f, 426struct object_entry *entry, 427 off_t write_offset) 428{ 429unsigned long limit; 430 off_t len; 431int usable_delta, to_reuse; 432 433if(!pack_to_stdout) 434crc32_begin(f); 435 436/* apply size limit if limited packsize and not first object */ 437if(!pack_size_limit || !nr_written) 438 limit =0; 439else if(pack_size_limit <= write_offset) 440/* 441 * the earlier object did not fit the limit; avoid 442 * mistaking this with unlimited (i.e. limit = 0). 443 */ 444 limit =1; 445else 446 limit = pack_size_limit - write_offset; 447 448if(!entry->delta) 449 usable_delta =0;/* no delta */ 450else if(!pack_size_limit) 451 usable_delta =1;/* unlimited packfile */ 452else if(entry->delta->idx.offset == (off_t)-1) 453 usable_delta =0;/* base was written to another pack */ 454else if(entry->delta->idx.offset) 455 usable_delta =1;/* base already exists in this pack */ 456else 457 usable_delta =0;/* base could end up in another pack */ 458 459if(!reuse_object) 460 to_reuse =0;/* explicit */ 461else if(!entry->in_pack) 462 to_reuse =0;/* can't reuse what we don't have */ 463else if(entry->type == OBJ_REF_DELTA || entry->type == OBJ_OFS_DELTA) 464/* check_object() decided it for us ... */ 465 to_reuse = usable_delta; 466/* ... but pack split may override that */ 467else if(entry->type != entry->in_pack_type) 468 to_reuse =0;/* pack has delta which is unusable */ 469else if(entry->delta) 470 to_reuse =0;/* we want to pack afresh */ 471else 472 to_reuse =1;/* we have it in-pack undeltified, 473 * and we do not need to deltify it. 474 */ 475 476if(!to_reuse) 477 len =write_no_reuse_object(f, entry, limit, usable_delta); 478else 479 len =write_reuse_object(f, entry, limit, usable_delta); 480if(!len) 481return0; 482 483if(usable_delta) 484 written_delta++; 485 written++; 486if(!pack_to_stdout) 487 entry->idx.crc32 =crc32_end(f); 488return len; 489} 490 491enum write_one_status { 492 WRITE_ONE_SKIP = -1,/* already written */ 493 WRITE_ONE_BREAK =0,/* writing this will bust the limit; not written */ 494 WRITE_ONE_WRITTEN =1,/* normal */ 495 WRITE_ONE_RECURSIVE =2/* already scheduled to be written */ 496}; 497 498static enum write_one_status write_one(struct sha1file *f, 499struct object_entry *e, 500 off_t *offset) 501{ 502 off_t size; 503int recursing; 504 505/* 506 * we set offset to 1 (which is an impossible value) to mark 507 * the fact that this object is involved in "write its base 508 * first before writing a deltified object" recursion. 509 */ 510 recursing = (e->idx.offset ==1); 511if(recursing) { 512warning("recursive delta detected for object%s", 513sha1_to_hex(e->idx.sha1)); 514return WRITE_ONE_RECURSIVE; 515}else if(e->idx.offset || e->preferred_base) { 516/* offset is non zero if object is written already. */ 517return WRITE_ONE_SKIP; 518} 519 520/* if we are deltified, write out base object first. */ 521if(e->delta) { 522 e->idx.offset =1;/* now recurse */ 523switch(write_one(f, e->delta, offset)) { 524case WRITE_ONE_RECURSIVE: 525/* we cannot depend on this one */ 526 e->delta = NULL; 527break; 528default: 529break; 530case WRITE_ONE_BREAK: 531 e->idx.offset = recursing; 532return WRITE_ONE_BREAK; 533} 534} 535 536 e->idx.offset = *offset; 537 size =write_object(f, e, *offset); 538if(!size) { 539 e->idx.offset = recursing; 540return WRITE_ONE_BREAK; 541} 542 written_list[nr_written++] = &e->idx; 543 544/* make sure off_t is sufficiently large not to wrap */ 545if(signed_add_overflows(*offset, size)) 546die("pack too large for current definition of off_t"); 547*offset += size; 548return WRITE_ONE_WRITTEN; 549} 550 551static intmark_tagged(const char*path,const struct object_id *oid,int flag, 552void*cb_data) 553{ 554unsigned char peeled[20]; 555struct object_entry *entry =packlist_find(&to_pack, oid->hash, NULL); 556 557if(entry) 558 entry->tagged =1; 559if(!peel_ref(path, peeled)) { 560 entry =packlist_find(&to_pack, peeled, NULL); 561if(entry) 562 entry->tagged =1; 563} 564return0; 565} 566 567staticinlinevoidadd_to_write_order(struct object_entry **wo, 568unsigned int*endp, 569struct object_entry *e) 570{ 571if(e->filled) 572return; 573 wo[(*endp)++] = e; 574 e->filled =1; 575} 576 577static voidadd_descendants_to_write_order(struct object_entry **wo, 578unsigned int*endp, 579struct object_entry *e) 580{ 581int add_to_order =1; 582while(e) { 583if(add_to_order) { 584struct object_entry *s; 585/* add this node... */ 586add_to_write_order(wo, endp, e); 587/* all its siblings... */ 588for(s = e->delta_sibling; s; s = s->delta_sibling) { 589add_to_write_order(wo, endp, s); 590} 591} 592/* drop down a level to add left subtree nodes if possible */ 593if(e->delta_child) { 594 add_to_order =1; 595 e = e->delta_child; 596}else{ 597 add_to_order =0; 598/* our sibling might have some children, it is next */ 599if(e->delta_sibling) { 600 e = e->delta_sibling; 601continue; 602} 603/* go back to our parent node */ 604 e = e->delta; 605while(e && !e->delta_sibling) { 606/* we're on the right side of a subtree, keep 607 * going up until we can go right again */ 608 e = e->delta; 609} 610if(!e) { 611/* done- we hit our original root node */ 612return; 613} 614/* pass it off to sibling at this level */ 615 e = e->delta_sibling; 616} 617}; 618} 619 620static voidadd_family_to_write_order(struct object_entry **wo, 621unsigned int*endp, 622struct object_entry *e) 623{ 624struct object_entry *root; 625 626for(root = e; root->delta; root = root->delta) 627;/* nothing */ 628add_descendants_to_write_order(wo, endp, root); 629} 630 631static struct object_entry **compute_write_order(void) 632{ 633unsigned int i, wo_end, last_untagged; 634 635struct object_entry **wo; 636struct object_entry *objects = to_pack.objects; 637 638for(i =0; i < to_pack.nr_objects; i++) { 639 objects[i].tagged =0; 640 objects[i].filled =0; 641 objects[i].delta_child = NULL; 642 objects[i].delta_sibling = NULL; 643} 644 645/* 646 * Fully connect delta_child/delta_sibling network. 647 * Make sure delta_sibling is sorted in the original 648 * recency order. 649 */ 650for(i = to_pack.nr_objects; i >0;) { 651struct object_entry *e = &objects[--i]; 652if(!e->delta) 653continue; 654/* Mark me as the first child */ 655 e->delta_sibling = e->delta->delta_child; 656 e->delta->delta_child = e; 657} 658 659/* 660 * Mark objects that are at the tip of tags. 661 */ 662for_each_tag_ref(mark_tagged, NULL); 663 664/* 665 * Give the objects in the original recency order until 666 * we see a tagged tip. 667 */ 668ALLOC_ARRAY(wo, to_pack.nr_objects); 669for(i = wo_end =0; i < to_pack.nr_objects; i++) { 670if(objects[i].tagged) 671break; 672add_to_write_order(wo, &wo_end, &objects[i]); 673} 674 last_untagged = i; 675 676/* 677 * Then fill all the tagged tips. 678 */ 679for(; i < to_pack.nr_objects; i++) { 680if(objects[i].tagged) 681add_to_write_order(wo, &wo_end, &objects[i]); 682} 683 684/* 685 * And then all remaining commits and tags. 686 */ 687for(i = last_untagged; i < to_pack.nr_objects; i++) { 688if(objects[i].type != OBJ_COMMIT && 689 objects[i].type != OBJ_TAG) 690continue; 691add_to_write_order(wo, &wo_end, &objects[i]); 692} 693 694/* 695 * And then all the trees. 696 */ 697for(i = last_untagged; i < to_pack.nr_objects; i++) { 698if(objects[i].type != OBJ_TREE) 699continue; 700add_to_write_order(wo, &wo_end, &objects[i]); 701} 702 703/* 704 * Finally all the rest in really tight order 705 */ 706for(i = last_untagged; i < to_pack.nr_objects; i++) { 707if(!objects[i].filled) 708add_family_to_write_order(wo, &wo_end, &objects[i]); 709} 710 711if(wo_end != to_pack.nr_objects) 712die("ordered%uobjects, expected %"PRIu32, wo_end, to_pack.nr_objects); 713 714return wo; 715} 716 717static off_t write_reused_pack(struct sha1file *f) 718{ 719unsigned char buffer[8192]; 720 off_t to_write, total; 721int fd; 722 723if(!is_pack_valid(reuse_packfile)) 724die("packfile is invalid:%s", reuse_packfile->pack_name); 725 726 fd =git_open(reuse_packfile->pack_name); 727if(fd <0) 728die_errno("unable to open packfile for reuse:%s", 729 reuse_packfile->pack_name); 730 731if(lseek(fd,sizeof(struct pack_header), SEEK_SET) == -1) 732die_errno("unable to seek in reused packfile"); 733 734if(reuse_packfile_offset <0) 735 reuse_packfile_offset = reuse_packfile->pack_size -20; 736 737 total = to_write = reuse_packfile_offset -sizeof(struct pack_header); 738 739while(to_write) { 740int read_pack =xread(fd, buffer,sizeof(buffer)); 741 742if(read_pack <=0) 743die_errno("unable to read from reused packfile"); 744 745if(read_pack > to_write) 746 read_pack = to_write; 747 748sha1write(f, buffer, read_pack); 749 to_write -= read_pack; 750 751/* 752 * We don't know the actual number of objects written, 753 * only how many bytes written, how many bytes total, and 754 * how many objects total. So we can fake it by pretending all 755 * objects we are writing are the same size. This gives us a 756 * smooth progress meter, and at the end it matches the true 757 * answer. 758 */ 759 written = reuse_packfile_objects * 760(((double)(total - to_write)) / total); 761display_progress(progress_state, written); 762} 763 764close(fd); 765 written = reuse_packfile_objects; 766display_progress(progress_state, written); 767return reuse_packfile_offset -sizeof(struct pack_header); 768} 769 770static const char no_split_warning[] =N_( 771"disabling bitmap writing, packs are split due to pack.packSizeLimit" 772); 773 774static voidwrite_pack_file(void) 775{ 776uint32_t i =0, j; 777struct sha1file *f; 778 off_t offset; 779uint32_t nr_remaining = nr_result; 780time_t last_mtime =0; 781struct object_entry **write_order; 782 783if(progress > pack_to_stdout) 784 progress_state =start_progress(_("Writing objects"), nr_result); 785ALLOC_ARRAY(written_list, to_pack.nr_objects); 786 write_order =compute_write_order(); 787 788do{ 789unsigned char sha1[20]; 790char*pack_tmp_name = NULL; 791 792if(pack_to_stdout) 793 f =sha1fd_throughput(1,"<stdout>", progress_state); 794else 795 f =create_tmp_packfile(&pack_tmp_name); 796 797 offset =write_pack_header(f, nr_remaining); 798 799if(reuse_packfile) { 800 off_t packfile_size; 801assert(pack_to_stdout); 802 803 packfile_size =write_reused_pack(f); 804 offset += packfile_size; 805} 806 807 nr_written =0; 808for(; i < to_pack.nr_objects; i++) { 809struct object_entry *e = write_order[i]; 810if(write_one(f, e, &offset) == WRITE_ONE_BREAK) 811break; 812display_progress(progress_state, written); 813} 814 815/* 816 * Did we write the wrong # entries in the header? 817 * If so, rewrite it like in fast-import 818 */ 819if(pack_to_stdout) { 820sha1close(f, sha1, CSUM_CLOSE); 821}else if(nr_written == nr_remaining) { 822sha1close(f, sha1, CSUM_FSYNC); 823}else{ 824int fd =sha1close(f, sha1,0); 825fixup_pack_header_footer(fd, sha1, pack_tmp_name, 826 nr_written, sha1, offset); 827close(fd); 828if(write_bitmap_index) { 829warning(_(no_split_warning)); 830 write_bitmap_index =0; 831} 832} 833 834if(!pack_to_stdout) { 835struct stat st; 836struct strbuf tmpname = STRBUF_INIT; 837 838/* 839 * Packs are runtime accessed in their mtime 840 * order since newer packs are more likely to contain 841 * younger objects. So if we are creating multiple 842 * packs then we should modify the mtime of later ones 843 * to preserve this property. 844 */ 845if(stat(pack_tmp_name, &st) <0) { 846warning_errno("failed to stat%s", pack_tmp_name); 847}else if(!last_mtime) { 848 last_mtime = st.st_mtime; 849}else{ 850struct utimbuf utb; 851 utb.actime = st.st_atime; 852 utb.modtime = --last_mtime; 853if(utime(pack_tmp_name, &utb) <0) 854warning_errno("failed utime() on%s", pack_tmp_name); 855} 856 857strbuf_addf(&tmpname,"%s-", base_name); 858 859if(write_bitmap_index) { 860bitmap_writer_set_checksum(sha1); 861bitmap_writer_build_type_index(written_list, nr_written); 862} 863 864finish_tmp_packfile(&tmpname, pack_tmp_name, 865 written_list, nr_written, 866&pack_idx_opts, sha1); 867 868if(write_bitmap_index) { 869strbuf_addf(&tmpname,"%s.bitmap",sha1_to_hex(sha1)); 870 871stop_progress(&progress_state); 872 873bitmap_writer_show_progress(progress); 874bitmap_writer_reuse_bitmaps(&to_pack); 875bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1); 876bitmap_writer_build(&to_pack); 877bitmap_writer_finish(written_list, nr_written, 878 tmpname.buf, write_bitmap_options); 879 write_bitmap_index =0; 880} 881 882strbuf_release(&tmpname); 883free(pack_tmp_name); 884puts(sha1_to_hex(sha1)); 885} 886 887/* mark written objects as written to previous pack */ 888for(j =0; j < nr_written; j++) { 889 written_list[j]->offset = (off_t)-1; 890} 891 nr_remaining -= nr_written; 892}while(nr_remaining && i < to_pack.nr_objects); 893 894free(written_list); 895free(write_order); 896stop_progress(&progress_state); 897if(written != nr_result) 898die("wrote %"PRIu32" objects while expecting %"PRIu32, 899 written, nr_result); 900} 901 902static intno_try_delta(const char*path) 903{ 904static struct attr_check *check; 905 906if(!check) 907 check =attr_check_initl("delta", NULL); 908if(git_check_attr(path, check)) 909return0; 910if(ATTR_FALSE(check->items[0].value)) 911return1; 912return0; 913} 914 915/* 916 * When adding an object, check whether we have already added it 917 * to our packing list. If so, we can skip. However, if we are 918 * being asked to excludei t, but the previous mention was to include 919 * it, make sure to adjust its flags and tweak our numbers accordingly. 920 * 921 * As an optimization, we pass out the index position where we would have 922 * found the item, since that saves us from having to look it up again a 923 * few lines later when we want to add the new entry. 924 */ 925static inthave_duplicate_entry(const unsigned char*sha1, 926int exclude, 927uint32_t*index_pos) 928{ 929struct object_entry *entry; 930 931 entry =packlist_find(&to_pack, sha1, index_pos); 932if(!entry) 933return0; 934 935if(exclude) { 936if(!entry->preferred_base) 937 nr_result--; 938 entry->preferred_base =1; 939} 940 941return1; 942} 943 944static intwant_found_object(int exclude,struct packed_git *p) 945{ 946if(exclude) 947return1; 948if(incremental) 949return0; 950 951/* 952 * When asked to do --local (do not include an object that appears in a 953 * pack we borrow from elsewhere) or --honor-pack-keep (do not include 954 * an object that appears in a pack marked with .keep), finding a pack 955 * that matches the criteria is sufficient for us to decide to omit it. 956 * However, even if this pack does not satisfy the criteria, we need to 957 * make sure no copy of this object appears in _any_ pack that makes us 958 * to omit the object, so we need to check all the packs. 959 * 960 * We can however first check whether these options can possible matter; 961 * if they do not matter we know we want the object in generated pack. 962 * Otherwise, we signal "-1" at the end to tell the caller that we do 963 * not know either way, and it needs to check more packs. 964 */ 965if(!ignore_packed_keep && 966(!local || !have_non_local_packs)) 967return1; 968 969if(local && !p->pack_local) 970return0; 971if(ignore_packed_keep && p->pack_local && p->pack_keep) 972return0; 973 974/* we don't know yet; keep looking for more packs */ 975return-1; 976} 977 978/* 979 * Check whether we want the object in the pack (e.g., we do not want 980 * objects found in non-local stores if the "--local" option was used). 981 * 982 * If the caller already knows an existing pack it wants to take the object 983 * from, that is passed in *found_pack and *found_offset; otherwise this 984 * function finds if there is any pack that has the object and returns the pack 985 * and its offset in these variables. 986 */ 987static intwant_object_in_pack(const unsigned char*sha1, 988int exclude, 989struct packed_git **found_pack, 990 off_t *found_offset) 991{ 992struct mru_entry *entry; 993int want; 994 995if(!exclude && local &&has_loose_object_nonlocal(sha1)) 996return0; 997 998/* 999 * If we already know the pack object lives in, start checks from that1000 * pack - in the usual case when neither --local was given nor .keep files1001 * are present we will determine the answer right now.1002 */1003if(*found_pack) {1004 want =want_found_object(exclude, *found_pack);1005if(want != -1)1006return want;1007}10081009for(entry = packed_git_mru->head; entry; entry = entry->next) {1010struct packed_git *p = entry->item;1011 off_t offset;10121013if(p == *found_pack)1014 offset = *found_offset;1015else1016 offset =find_pack_entry_one(sha1, p);10171018if(offset) {1019if(!*found_pack) {1020if(!is_pack_valid(p))1021continue;1022*found_offset = offset;1023*found_pack = p;1024}1025 want =want_found_object(exclude, p);1026if(!exclude && want >0)1027mru_mark(packed_git_mru, entry);1028if(want != -1)1029return want;1030}1031}10321033return1;1034}10351036static voidcreate_object_entry(const unsigned char*sha1,1037enum object_type type,1038uint32_t hash,1039int exclude,1040int no_try_delta,1041uint32_t index_pos,1042struct packed_git *found_pack,1043 off_t found_offset)1044{1045struct object_entry *entry;10461047 entry =packlist_alloc(&to_pack, sha1, index_pos);1048 entry->hash = hash;1049if(type)1050 entry->type = type;1051if(exclude)1052 entry->preferred_base =1;1053else1054 nr_result++;1055if(found_pack) {1056 entry->in_pack = found_pack;1057 entry->in_pack_offset = found_offset;1058}10591060 entry->no_try_delta = no_try_delta;1061}10621063static const char no_closure_warning[] =N_(1064"disabling bitmap writing, as some objects are not being packed"1065);10661067static intadd_object_entry(const unsigned char*sha1,enum object_type type,1068const char*name,int exclude)1069{1070struct packed_git *found_pack = NULL;1071 off_t found_offset =0;1072uint32_t index_pos;10731074if(have_duplicate_entry(sha1, exclude, &index_pos))1075return0;10761077if(!want_object_in_pack(sha1, exclude, &found_pack, &found_offset)) {1078/* The pack is missing an object, so it will not have closure */1079if(write_bitmap_index) {1080warning(_(no_closure_warning));1081 write_bitmap_index =0;1082}1083return0;1084}10851086create_object_entry(sha1, type,pack_name_hash(name),1087 exclude, name &&no_try_delta(name),1088 index_pos, found_pack, found_offset);10891090display_progress(progress_state, nr_result);1091return1;1092}10931094static intadd_object_entry_from_bitmap(const unsigned char*sha1,1095enum object_type type,1096int flags,uint32_t name_hash,1097struct packed_git *pack, off_t offset)1098{1099uint32_t index_pos;11001101if(have_duplicate_entry(sha1,0, &index_pos))1102return0;11031104if(!want_object_in_pack(sha1,0, &pack, &offset))1105return0;11061107create_object_entry(sha1, type, name_hash,0,0, index_pos, pack, offset);11081109display_progress(progress_state, nr_result);1110return1;1111}11121113struct pbase_tree_cache {1114unsigned char sha1[20];1115int ref;1116int temporary;1117void*tree_data;1118unsigned long tree_size;1119};11201121static struct pbase_tree_cache *(pbase_tree_cache[256]);1122static intpbase_tree_cache_ix(const unsigned char*sha1)1123{1124return sha1[0] %ARRAY_SIZE(pbase_tree_cache);1125}1126static intpbase_tree_cache_ix_incr(int ix)1127{1128return(ix+1) %ARRAY_SIZE(pbase_tree_cache);1129}11301131static struct pbase_tree {1132struct pbase_tree *next;1133/* This is a phony "cache" entry; we are not1134 * going to evict it or find it through _get()1135 * mechanism -- this is for the toplevel node that1136 * would almost always change with any commit.1137 */1138struct pbase_tree_cache pcache;1139} *pbase_tree;11401141static struct pbase_tree_cache *pbase_tree_get(const unsigned char*sha1)1142{1143struct pbase_tree_cache *ent, *nent;1144void*data;1145unsigned long size;1146enum object_type type;1147int neigh;1148int my_ix =pbase_tree_cache_ix(sha1);1149int available_ix = -1;11501151/* pbase-tree-cache acts as a limited hashtable.1152 * your object will be found at your index or within a few1153 * slots after that slot if it is cached.1154 */1155for(neigh =0; neigh <8; neigh++) {1156 ent = pbase_tree_cache[my_ix];1157if(ent && !hashcmp(ent->sha1, sha1)) {1158 ent->ref++;1159return ent;1160}1161else if(((available_ix <0) && (!ent || !ent->ref)) ||1162((0<= available_ix) &&1163(!ent && pbase_tree_cache[available_ix])))1164 available_ix = my_ix;1165if(!ent)1166break;1167 my_ix =pbase_tree_cache_ix_incr(my_ix);1168}11691170/* Did not find one. Either we got a bogus request or1171 * we need to read and perhaps cache.1172 */1173 data =read_sha1_file(sha1, &type, &size);1174if(!data)1175return NULL;1176if(type != OBJ_TREE) {1177free(data);1178return NULL;1179}11801181/* We need to either cache or return a throwaway copy */11821183if(available_ix <0)1184 ent = NULL;1185else{1186 ent = pbase_tree_cache[available_ix];1187 my_ix = available_ix;1188}11891190if(!ent) {1191 nent =xmalloc(sizeof(*nent));1192 nent->temporary = (available_ix <0);1193}1194else{1195/* evict and reuse */1196free(ent->tree_data);1197 nent = ent;1198}1199hashcpy(nent->sha1, sha1);1200 nent->tree_data = data;1201 nent->tree_size = size;1202 nent->ref =1;1203if(!nent->temporary)1204 pbase_tree_cache[my_ix] = nent;1205return nent;1206}12071208static voidpbase_tree_put(struct pbase_tree_cache *cache)1209{1210if(!cache->temporary) {1211 cache->ref--;1212return;1213}1214free(cache->tree_data);1215free(cache);1216}12171218static intname_cmp_len(const char*name)1219{1220int i;1221for(i =0; name[i] && name[i] !='\n'&& name[i] !='/'; i++)1222;1223return i;1224}12251226static voidadd_pbase_object(struct tree_desc *tree,1227const char*name,1228int cmplen,1229const char*fullname)1230{1231struct name_entry entry;1232int cmp;12331234while(tree_entry(tree,&entry)) {1235if(S_ISGITLINK(entry.mode))1236continue;1237 cmp =tree_entry_len(&entry) != cmplen ?1:1238memcmp(name, entry.path, cmplen);1239if(cmp >0)1240continue;1241if(cmp <0)1242return;1243if(name[cmplen] !='/') {1244add_object_entry(entry.oid->hash,1245object_type(entry.mode),1246 fullname,1);1247return;1248}1249if(S_ISDIR(entry.mode)) {1250struct tree_desc sub;1251struct pbase_tree_cache *tree;1252const char*down = name+cmplen+1;1253int downlen =name_cmp_len(down);12541255 tree =pbase_tree_get(entry.oid->hash);1256if(!tree)1257return;1258init_tree_desc(&sub, tree->tree_data, tree->tree_size);12591260add_pbase_object(&sub, down, downlen, fullname);1261pbase_tree_put(tree);1262}1263}1264}12651266static unsigned*done_pbase_paths;1267static int done_pbase_paths_num;1268static int done_pbase_paths_alloc;1269static intdone_pbase_path_pos(unsigned hash)1270{1271int lo =0;1272int hi = done_pbase_paths_num;1273while(lo < hi) {1274int mi = (hi + lo) /2;1275if(done_pbase_paths[mi] == hash)1276return mi;1277if(done_pbase_paths[mi] < hash)1278 hi = mi;1279else1280 lo = mi +1;1281}1282return-lo-1;1283}12841285static intcheck_pbase_path(unsigned hash)1286{1287int pos = (!done_pbase_paths) ? -1:done_pbase_path_pos(hash);1288if(0<= pos)1289return1;1290 pos = -pos -1;1291ALLOC_GROW(done_pbase_paths,1292 done_pbase_paths_num +1,1293 done_pbase_paths_alloc);1294 done_pbase_paths_num++;1295if(pos < done_pbase_paths_num)1296memmove(done_pbase_paths + pos +1,1297 done_pbase_paths + pos,1298(done_pbase_paths_num - pos -1) *sizeof(unsigned));1299 done_pbase_paths[pos] = hash;1300return0;1301}13021303static voidadd_preferred_base_object(const char*name)1304{1305struct pbase_tree *it;1306int cmplen;1307unsigned hash =pack_name_hash(name);13081309if(!num_preferred_base ||check_pbase_path(hash))1310return;13111312 cmplen =name_cmp_len(name);1313for(it = pbase_tree; it; it = it->next) {1314if(cmplen ==0) {1315add_object_entry(it->pcache.sha1, OBJ_TREE, NULL,1);1316}1317else{1318struct tree_desc tree;1319init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);1320add_pbase_object(&tree, name, cmplen, name);1321}1322}1323}13241325static voidadd_preferred_base(unsigned char*sha1)1326{1327struct pbase_tree *it;1328void*data;1329unsigned long size;1330unsigned char tree_sha1[20];13311332if(window <= num_preferred_base++)1333return;13341335 data =read_object_with_reference(sha1, tree_type, &size, tree_sha1);1336if(!data)1337return;13381339for(it = pbase_tree; it; it = it->next) {1340if(!hashcmp(it->pcache.sha1, tree_sha1)) {1341free(data);1342return;1343}1344}13451346 it =xcalloc(1,sizeof(*it));1347 it->next = pbase_tree;1348 pbase_tree = it;13491350hashcpy(it->pcache.sha1, tree_sha1);1351 it->pcache.tree_data = data;1352 it->pcache.tree_size = size;1353}13541355static voidcleanup_preferred_base(void)1356{1357struct pbase_tree *it;1358unsigned i;13591360 it = pbase_tree;1361 pbase_tree = NULL;1362while(it) {1363struct pbase_tree *this= it;1364 it =this->next;1365free(this->pcache.tree_data);1366free(this);1367}13681369for(i =0; i <ARRAY_SIZE(pbase_tree_cache); i++) {1370if(!pbase_tree_cache[i])1371continue;1372free(pbase_tree_cache[i]->tree_data);1373free(pbase_tree_cache[i]);1374 pbase_tree_cache[i] = NULL;1375}13761377free(done_pbase_paths);1378 done_pbase_paths = NULL;1379 done_pbase_paths_num = done_pbase_paths_alloc =0;1380}13811382static voidcheck_object(struct object_entry *entry)1383{1384if(entry->in_pack) {1385struct packed_git *p = entry->in_pack;1386struct pack_window *w_curs = NULL;1387const unsigned char*base_ref = NULL;1388struct object_entry *base_entry;1389unsigned long used, used_0;1390unsigned long avail;1391 off_t ofs;1392unsigned char*buf, c;13931394 buf =use_pack(p, &w_curs, entry->in_pack_offset, &avail);13951396/*1397 * We want in_pack_type even if we do not reuse delta1398 * since non-delta representations could still be reused.1399 */1400 used =unpack_object_header_buffer(buf, avail,1401&entry->in_pack_type,1402&entry->size);1403if(used ==0)1404goto give_up;14051406/*1407 * Determine if this is a delta and if so whether we can1408 * reuse it or not. Otherwise let's find out as cheaply as1409 * possible what the actual type and size for this object is.1410 */1411switch(entry->in_pack_type) {1412default:1413/* Not a delta hence we've already got all we need. */1414 entry->type = entry->in_pack_type;1415 entry->in_pack_header_size = used;1416if(entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB)1417goto give_up;1418unuse_pack(&w_curs);1419return;1420case OBJ_REF_DELTA:1421if(reuse_delta && !entry->preferred_base)1422 base_ref =use_pack(p, &w_curs,1423 entry->in_pack_offset + used, NULL);1424 entry->in_pack_header_size = used +20;1425break;1426case OBJ_OFS_DELTA:1427 buf =use_pack(p, &w_curs,1428 entry->in_pack_offset + used, NULL);1429 used_0 =0;1430 c = buf[used_0++];1431 ofs = c &127;1432while(c &128) {1433 ofs +=1;1434if(!ofs ||MSB(ofs,7)) {1435error("delta base offset overflow in pack for%s",1436sha1_to_hex(entry->idx.sha1));1437goto give_up;1438}1439 c = buf[used_0++];1440 ofs = (ofs <<7) + (c &127);1441}1442 ofs = entry->in_pack_offset - ofs;1443if(ofs <=0|| ofs >= entry->in_pack_offset) {1444error("delta base offset out of bound for%s",1445sha1_to_hex(entry->idx.sha1));1446goto give_up;1447}1448if(reuse_delta && !entry->preferred_base) {1449struct revindex_entry *revidx;1450 revidx =find_pack_revindex(p, ofs);1451if(!revidx)1452goto give_up;1453 base_ref =nth_packed_object_sha1(p, revidx->nr);1454}1455 entry->in_pack_header_size = used + used_0;1456break;1457}14581459if(base_ref && (base_entry =packlist_find(&to_pack, base_ref, NULL))) {1460/*1461 * If base_ref was set above that means we wish to1462 * reuse delta data, and we even found that base1463 * in the list of objects we want to pack. Goodie!1464 *1465 * Depth value does not matter - find_deltas() will1466 * never consider reused delta as the base object to1467 * deltify other objects against, in order to avoid1468 * circular deltas.1469 */1470 entry->type = entry->in_pack_type;1471 entry->delta = base_entry;1472 entry->delta_size = entry->size;1473 entry->delta_sibling = base_entry->delta_child;1474 base_entry->delta_child = entry;1475unuse_pack(&w_curs);1476return;1477}14781479if(entry->type) {1480/*1481 * This must be a delta and we already know what the1482 * final object type is. Let's extract the actual1483 * object size from the delta header.1484 */1485 entry->size =get_size_from_delta(p, &w_curs,1486 entry->in_pack_offset + entry->in_pack_header_size);1487if(entry->size ==0)1488goto give_up;1489unuse_pack(&w_curs);1490return;1491}14921493/*1494 * No choice but to fall back to the recursive delta walk1495 * with sha1_object_info() to find about the object type1496 * at this point...1497 */1498 give_up:1499unuse_pack(&w_curs);1500}15011502 entry->type =sha1_object_info(entry->idx.sha1, &entry->size);1503/*1504 * The error condition is checked in prepare_pack(). This is1505 * to permit a missing preferred base object to be ignored1506 * as a preferred base. Doing so can result in a larger1507 * pack file, but the transfer will still take place.1508 */1509}15101511static intpack_offset_sort(const void*_a,const void*_b)1512{1513const struct object_entry *a = *(struct object_entry **)_a;1514const struct object_entry *b = *(struct object_entry **)_b;15151516/* avoid filesystem trashing with loose objects */1517if(!a->in_pack && !b->in_pack)1518returnhashcmp(a->idx.sha1, b->idx.sha1);15191520if(a->in_pack < b->in_pack)1521return-1;1522if(a->in_pack > b->in_pack)1523return1;1524return a->in_pack_offset < b->in_pack_offset ? -1:1525(a->in_pack_offset > b->in_pack_offset);1526}15271528/*1529 * Drop an on-disk delta we were planning to reuse. Naively, this would1530 * just involve blanking out the "delta" field, but we have to deal1531 * with some extra book-keeping:1532 *1533 * 1. Removing ourselves from the delta_sibling linked list.1534 *1535 * 2. Updating our size/type to the non-delta representation. These were1536 * either not recorded initially (size) or overwritten with the delta type1537 * (type) when check_object() decided to reuse the delta.1538 *1539 * 3. Resetting our delta depth, as we are now a base object.1540 */1541static voiddrop_reused_delta(struct object_entry *entry)1542{1543struct object_entry **p = &entry->delta->delta_child;1544struct object_info oi = OBJECT_INFO_INIT;15451546while(*p) {1547if(*p == entry)1548*p = (*p)->delta_sibling;1549else1550 p = &(*p)->delta_sibling;1551}1552 entry->delta = NULL;1553 entry->depth =0;15541555 oi.sizep = &entry->size;1556 oi.typep = &entry->type;1557if(packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) <0) {1558/*1559 * We failed to get the info from this pack for some reason;1560 * fall back to sha1_object_info, which may find another copy.1561 * And if that fails, the error will be recorded in entry->type1562 * and dealt with in prepare_pack().1563 */1564 entry->type =sha1_object_info(entry->idx.sha1, &entry->size);1565}1566}15671568/*1569 * Follow the chain of deltas from this entry onward, throwing away any links1570 * that cause us to hit a cycle (as determined by the DFS state flags in1571 * the entries).1572 *1573 * We also detect too-long reused chains that would violate our --depth1574 * limit.1575 */1576static voidbreak_delta_chains(struct object_entry *entry)1577{1578/*1579 * The actual depth of each object we will write is stored as an int,1580 * as it cannot exceed our int "depth" limit. But before we break1581 * changes based no that limit, we may potentially go as deep as the1582 * number of objects, which is elsewhere bounded to a uint32_t.1583 */1584uint32_t total_depth;1585struct object_entry *cur, *next;15861587for(cur = entry, total_depth =0;1588 cur;1589 cur = cur->delta, total_depth++) {1590if(cur->dfs_state == DFS_DONE) {1591/*1592 * We've already seen this object and know it isn't1593 * part of a cycle. We do need to append its depth1594 * to our count.1595 */1596 total_depth += cur->depth;1597break;1598}15991600/*1601 * We break cycles before looping, so an ACTIVE state (or any1602 * other cruft which made its way into the state variable)1603 * is a bug.1604 */1605if(cur->dfs_state != DFS_NONE)1606die("BUG: confusing delta dfs state in first pass:%d",1607 cur->dfs_state);16081609/*1610 * Now we know this is the first time we've seen the object. If1611 * it's not a delta, we're done traversing, but we'll mark it1612 * done to save time on future traversals.1613 */1614if(!cur->delta) {1615 cur->dfs_state = DFS_DONE;1616break;1617}16181619/*1620 * Mark ourselves as active and see if the next step causes1621 * us to cycle to another active object. It's important to do1622 * this _before_ we loop, because it impacts where we make the1623 * cut, and thus how our total_depth counter works.1624 * E.g., We may see a partial loop like:1625 *1626 * A -> B -> C -> D -> B1627 *1628 * Cutting B->C breaks the cycle. But now the depth of A is1629 * only 1, and our total_depth counter is at 3. The size of the1630 * error is always one less than the size of the cycle we1631 * broke. Commits C and D were "lost" from A's chain.1632 *1633 * If we instead cut D->B, then the depth of A is correct at 3.1634 * We keep all commits in the chain that we examined.1635 */1636 cur->dfs_state = DFS_ACTIVE;1637if(cur->delta->dfs_state == DFS_ACTIVE) {1638drop_reused_delta(cur);1639 cur->dfs_state = DFS_DONE;1640break;1641}1642}16431644/*1645 * And now that we've gone all the way to the bottom of the chain, we1646 * need to clear the active flags and set the depth fields as1647 * appropriate. Unlike the loop above, which can quit when it drops a1648 * delta, we need to keep going to look for more depth cuts. So we need1649 * an extra "next" pointer to keep going after we reset cur->delta.1650 */1651for(cur = entry; cur; cur = next) {1652 next = cur->delta;16531654/*1655 * We should have a chain of zero or more ACTIVE states down to1656 * a final DONE. We can quit after the DONE, because either it1657 * has no bases, or we've already handled them in a previous1658 * call.1659 */1660if(cur->dfs_state == DFS_DONE)1661break;1662else if(cur->dfs_state != DFS_ACTIVE)1663die("BUG: confusing delta dfs state in second pass:%d",1664 cur->dfs_state);16651666/*1667 * If the total_depth is more than depth, then we need to snip1668 * the chain into two or more smaller chains that don't exceed1669 * the maximum depth. Most of the resulting chains will contain1670 * (depth + 1) entries (i.e., depth deltas plus one base), and1671 * the last chain (i.e., the one containing entry) will contain1672 * whatever entries are left over, namely1673 * (total_depth % (depth + 1)) of them.1674 *1675 * Since we are iterating towards decreasing depth, we need to1676 * decrement total_depth as we go, and we need to write to the1677 * entry what its final depth will be after all of the1678 * snipping. Since we're snipping into chains of length (depth1679 * + 1) entries, the final depth of an entry will be its1680 * original depth modulo (depth + 1). Any time we encounter an1681 * entry whose final depth is supposed to be zero, we snip it1682 * from its delta base, thereby making it so.1683 */1684 cur->depth = (total_depth--) % (depth +1);1685if(!cur->depth)1686drop_reused_delta(cur);16871688 cur->dfs_state = DFS_DONE;1689}1690}16911692static voidget_object_details(void)1693{1694uint32_t i;1695struct object_entry **sorted_by_offset;16961697 sorted_by_offset =xcalloc(to_pack.nr_objects,sizeof(struct object_entry *));1698for(i =0; i < to_pack.nr_objects; i++)1699 sorted_by_offset[i] = to_pack.objects + i;1700QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);17011702for(i =0; i < to_pack.nr_objects; i++) {1703struct object_entry *entry = sorted_by_offset[i];1704check_object(entry);1705if(big_file_threshold < entry->size)1706 entry->no_try_delta =1;1707}17081709/*1710 * This must happen in a second pass, since we rely on the delta1711 * information for the whole list being completed.1712 */1713for(i =0; i < to_pack.nr_objects; i++)1714break_delta_chains(&to_pack.objects[i]);17151716free(sorted_by_offset);1717}17181719/*1720 * We search for deltas in a list sorted by type, by filename hash, and then1721 * by size, so that we see progressively smaller and smaller files.1722 * That's because we prefer deltas to be from the bigger file1723 * to the smaller -- deletes are potentially cheaper, but perhaps1724 * more importantly, the bigger file is likely the more recent1725 * one. The deepest deltas are therefore the oldest objects which are1726 * less susceptible to be accessed often.1727 */1728static inttype_size_sort(const void*_a,const void*_b)1729{1730const struct object_entry *a = *(struct object_entry **)_a;1731const struct object_entry *b = *(struct object_entry **)_b;17321733if(a->type > b->type)1734return-1;1735if(a->type < b->type)1736return1;1737if(a->hash > b->hash)1738return-1;1739if(a->hash < b->hash)1740return1;1741if(a->preferred_base > b->preferred_base)1742return-1;1743if(a->preferred_base < b->preferred_base)1744return1;1745if(a->size > b->size)1746return-1;1747if(a->size < b->size)1748return1;1749return a < b ? -1: (a > b);/* newest first */1750}17511752struct unpacked {1753struct object_entry *entry;1754void*data;1755struct delta_index *index;1756unsigned depth;1757};17581759static intdelta_cacheable(unsigned long src_size,unsigned long trg_size,1760unsigned long delta_size)1761{1762if(max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)1763return0;17641765if(delta_size < cache_max_small_delta_size)1766return1;17671768/* cache delta, if objects are large enough compared to delta size */1769if((src_size >>20) + (trg_size >>21) > (delta_size >>10))1770return1;17711772return0;1773}17741775#ifndef NO_PTHREADS17761777static pthread_mutex_t read_mutex;1778#define read_lock() pthread_mutex_lock(&read_mutex)1779#define read_unlock() pthread_mutex_unlock(&read_mutex)17801781static pthread_mutex_t cache_mutex;1782#define cache_lock() pthread_mutex_lock(&cache_mutex)1783#define cache_unlock() pthread_mutex_unlock(&cache_mutex)17841785static pthread_mutex_t progress_mutex;1786#define progress_lock() pthread_mutex_lock(&progress_mutex)1787#define progress_unlock() pthread_mutex_unlock(&progress_mutex)17881789#else17901791#define read_lock() (void)01792#define read_unlock() (void)01793#define cache_lock() (void)01794#define cache_unlock() (void)01795#define progress_lock() (void)01796#define progress_unlock() (void)017971798#endif17991800static inttry_delta(struct unpacked *trg,struct unpacked *src,1801unsigned max_depth,unsigned long*mem_usage)1802{1803struct object_entry *trg_entry = trg->entry;1804struct object_entry *src_entry = src->entry;1805unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;1806unsigned ref_depth;1807enum object_type type;1808void*delta_buf;18091810/* Don't bother doing diffs between different types */1811if(trg_entry->type != src_entry->type)1812return-1;18131814/*1815 * We do not bother to try a delta that we discarded on an1816 * earlier try, but only when reusing delta data. Note that1817 * src_entry that is marked as the preferred_base should always1818 * be considered, as even if we produce a suboptimal delta against1819 * it, we will still save the transfer cost, as we already know1820 * the other side has it and we won't send src_entry at all.1821 */1822if(reuse_delta && trg_entry->in_pack &&1823 trg_entry->in_pack == src_entry->in_pack &&1824!src_entry->preferred_base &&1825 trg_entry->in_pack_type != OBJ_REF_DELTA &&1826 trg_entry->in_pack_type != OBJ_OFS_DELTA)1827return0;18281829/* Let's not bust the allowed depth. */1830if(src->depth >= max_depth)1831return0;18321833/* Now some size filtering heuristics. */1834 trg_size = trg_entry->size;1835if(!trg_entry->delta) {1836 max_size = trg_size/2-20;1837 ref_depth =1;1838}else{1839 max_size = trg_entry->delta_size;1840 ref_depth = trg->depth;1841}1842 max_size = (uint64_t)max_size * (max_depth - src->depth) /1843(max_depth - ref_depth +1);1844if(max_size ==0)1845return0;1846 src_size = src_entry->size;1847 sizediff = src_size < trg_size ? trg_size - src_size :0;1848if(sizediff >= max_size)1849return0;1850if(trg_size < src_size /32)1851return0;18521853/* Load data if not already done */1854if(!trg->data) {1855read_lock();1856 trg->data =read_sha1_file(trg_entry->idx.sha1, &type, &sz);1857read_unlock();1858if(!trg->data)1859die("object%scannot be read",1860sha1_to_hex(trg_entry->idx.sha1));1861if(sz != trg_size)1862die("object%sinconsistent object length (%lu vs%lu)",1863sha1_to_hex(trg_entry->idx.sha1), sz, trg_size);1864*mem_usage += sz;1865}1866if(!src->data) {1867read_lock();1868 src->data =read_sha1_file(src_entry->idx.sha1, &type, &sz);1869read_unlock();1870if(!src->data) {1871if(src_entry->preferred_base) {1872static int warned =0;1873if(!warned++)1874warning("object%scannot be read",1875sha1_to_hex(src_entry->idx.sha1));1876/*1877 * Those objects are not included in the1878 * resulting pack. Be resilient and ignore1879 * them if they can't be read, in case the1880 * pack could be created nevertheless.1881 */1882return0;1883}1884die("object%scannot be read",1885sha1_to_hex(src_entry->idx.sha1));1886}1887if(sz != src_size)1888die("object%sinconsistent object length (%lu vs%lu)",1889sha1_to_hex(src_entry->idx.sha1), sz, src_size);1890*mem_usage += sz;1891}1892if(!src->index) {1893 src->index =create_delta_index(src->data, src_size);1894if(!src->index) {1895static int warned =0;1896if(!warned++)1897warning("suboptimal pack - out of memory");1898return0;1899}1900*mem_usage +=sizeof_delta_index(src->index);1901}19021903 delta_buf =create_delta(src->index, trg->data, trg_size, &delta_size, max_size);1904if(!delta_buf)1905return0;19061907if(trg_entry->delta) {1908/* Prefer only shallower same-sized deltas. */1909if(delta_size == trg_entry->delta_size &&1910 src->depth +1>= trg->depth) {1911free(delta_buf);1912return0;1913}1914}19151916/*1917 * Handle memory allocation outside of the cache1918 * accounting lock. Compiler will optimize the strangeness1919 * away when NO_PTHREADS is defined.1920 */1921free(trg_entry->delta_data);1922cache_lock();1923if(trg_entry->delta_data) {1924 delta_cache_size -= trg_entry->delta_size;1925 trg_entry->delta_data = NULL;1926}1927if(delta_cacheable(src_size, trg_size, delta_size)) {1928 delta_cache_size += delta_size;1929cache_unlock();1930 trg_entry->delta_data =xrealloc(delta_buf, delta_size);1931}else{1932cache_unlock();1933free(delta_buf);1934}19351936 trg_entry->delta = src_entry;1937 trg_entry->delta_size = delta_size;1938 trg->depth = src->depth +1;19391940return1;1941}19421943static unsigned intcheck_delta_limit(struct object_entry *me,unsigned int n)1944{1945struct object_entry *child = me->delta_child;1946unsigned int m = n;1947while(child) {1948unsigned int c =check_delta_limit(child, n +1);1949if(m < c)1950 m = c;1951 child = child->delta_sibling;1952}1953return m;1954}19551956static unsigned longfree_unpacked(struct unpacked *n)1957{1958unsigned long freed_mem =sizeof_delta_index(n->index);1959free_delta_index(n->index);1960 n->index = NULL;1961if(n->data) {1962 freed_mem += n->entry->size;1963free(n->data);1964 n->data = NULL;1965}1966 n->entry = NULL;1967 n->depth =0;1968return freed_mem;1969}19701971static voidfind_deltas(struct object_entry **list,unsigned*list_size,1972int window,int depth,unsigned*processed)1973{1974uint32_t i, idx =0, count =0;1975struct unpacked *array;1976unsigned long mem_usage =0;19771978 array =xcalloc(window,sizeof(struct unpacked));19791980for(;;) {1981struct object_entry *entry;1982struct unpacked *n = array + idx;1983int j, max_depth, best_base = -1;19841985progress_lock();1986if(!*list_size) {1987progress_unlock();1988break;1989}1990 entry = *list++;1991(*list_size)--;1992if(!entry->preferred_base) {1993(*processed)++;1994display_progress(progress_state, *processed);1995}1996progress_unlock();19971998 mem_usage -=free_unpacked(n);1999 n->entry = entry;20002001while(window_memory_limit &&2002 mem_usage > window_memory_limit &&2003 count >1) {2004uint32_t tail = (idx + window - count) % window;2005 mem_usage -=free_unpacked(array + tail);2006 count--;2007}20082009/* We do not compute delta to *create* objects we are not2010 * going to pack.2011 */2012if(entry->preferred_base)2013goto next;20142015/*2016 * If the current object is at pack edge, take the depth the2017 * objects that depend on the current object into account2018 * otherwise they would become too deep.2019 */2020 max_depth = depth;2021if(entry->delta_child) {2022 max_depth -=check_delta_limit(entry,0);2023if(max_depth <=0)2024goto next;2025}20262027 j = window;2028while(--j >0) {2029int ret;2030uint32_t other_idx = idx + j;2031struct unpacked *m;2032if(other_idx >= window)2033 other_idx -= window;2034 m = array + other_idx;2035if(!m->entry)2036break;2037 ret =try_delta(n, m, max_depth, &mem_usage);2038if(ret <0)2039break;2040else if(ret >0)2041 best_base = other_idx;2042}20432044/*2045 * If we decided to cache the delta data, then it is best2046 * to compress it right away. First because we have to do2047 * it anyway, and doing it here while we're threaded will2048 * save a lot of time in the non threaded write phase,2049 * as well as allow for caching more deltas within2050 * the same cache size limit.2051 * ...2052 * But only if not writing to stdout, since in that case2053 * the network is most likely throttling writes anyway,2054 * and therefore it is best to go to the write phase ASAP2055 * instead, as we can afford spending more time compressing2056 * between writes at that moment.2057 */2058if(entry->delta_data && !pack_to_stdout) {2059 entry->z_delta_size =do_compress(&entry->delta_data,2060 entry->delta_size);2061cache_lock();2062 delta_cache_size -= entry->delta_size;2063 delta_cache_size += entry->z_delta_size;2064cache_unlock();2065}20662067/* if we made n a delta, and if n is already at max2068 * depth, leaving it in the window is pointless. we2069 * should evict it first.2070 */2071if(entry->delta && max_depth <= n->depth)2072continue;20732074/*2075 * Move the best delta base up in the window, after the2076 * currently deltified object, to keep it longer. It will2077 * be the first base object to be attempted next.2078 */2079if(entry->delta) {2080struct unpacked swap = array[best_base];2081int dist = (window + idx - best_base) % window;2082int dst = best_base;2083while(dist--) {2084int src = (dst +1) % window;2085 array[dst] = array[src];2086 dst = src;2087}2088 array[dst] = swap;2089}20902091 next:2092 idx++;2093if(count +1< window)2094 count++;2095if(idx >= window)2096 idx =0;2097}20982099for(i =0; i < window; ++i) {2100free_delta_index(array[i].index);2101free(array[i].data);2102}2103free(array);2104}21052106#ifndef NO_PTHREADS21072108static voidtry_to_free_from_threads(size_t size)2109{2110read_lock();2111release_pack_memory(size);2112read_unlock();2113}21142115static try_to_free_t old_try_to_free_routine;21162117/*2118 * The main thread waits on the condition that (at least) one of the workers2119 * has stopped working (which is indicated in the .working member of2120 * struct thread_params).2121 * When a work thread has completed its work, it sets .working to 0 and2122 * signals the main thread and waits on the condition that .data_ready2123 * becomes 1.2124 */21252126struct thread_params {2127 pthread_t thread;2128struct object_entry **list;2129unsigned list_size;2130unsigned remaining;2131int window;2132int depth;2133int working;2134int data_ready;2135 pthread_mutex_t mutex;2136 pthread_cond_t cond;2137unsigned*processed;2138};21392140static pthread_cond_t progress_cond;21412142/*2143 * Mutex and conditional variable can't be statically-initialized on Windows.2144 */2145static voidinit_threaded_search(void)2146{2147init_recursive_mutex(&read_mutex);2148pthread_mutex_init(&cache_mutex, NULL);2149pthread_mutex_init(&progress_mutex, NULL);2150pthread_cond_init(&progress_cond, NULL);2151 old_try_to_free_routine =set_try_to_free_routine(try_to_free_from_threads);2152}21532154static voidcleanup_threaded_search(void)2155{2156set_try_to_free_routine(old_try_to_free_routine);2157pthread_cond_destroy(&progress_cond);2158pthread_mutex_destroy(&read_mutex);2159pthread_mutex_destroy(&cache_mutex);2160pthread_mutex_destroy(&progress_mutex);2161}21622163static void*threaded_find_deltas(void*arg)2164{2165struct thread_params *me = arg;21662167while(me->remaining) {2168find_deltas(me->list, &me->remaining,2169 me->window, me->depth, me->processed);21702171progress_lock();2172 me->working =0;2173pthread_cond_signal(&progress_cond);2174progress_unlock();21752176/*2177 * We must not set ->data_ready before we wait on the2178 * condition because the main thread may have set it to 12179 * before we get here. In order to be sure that new2180 * work is available if we see 1 in ->data_ready, it2181 * was initialized to 0 before this thread was spawned2182 * and we reset it to 0 right away.2183 */2184pthread_mutex_lock(&me->mutex);2185while(!me->data_ready)2186pthread_cond_wait(&me->cond, &me->mutex);2187 me->data_ready =0;2188pthread_mutex_unlock(&me->mutex);2189}2190/* leave ->working 1 so that this doesn't get more work assigned */2191return NULL;2192}21932194static voidll_find_deltas(struct object_entry **list,unsigned list_size,2195int window,int depth,unsigned*processed)2196{2197struct thread_params *p;2198int i, ret, active_threads =0;21992200init_threaded_search();22012202if(delta_search_threads <=1) {2203find_deltas(list, &list_size, window, depth, processed);2204cleanup_threaded_search();2205return;2206}2207if(progress > pack_to_stdout)2208fprintf(stderr,"Delta compression using up to%dthreads.\n",2209 delta_search_threads);2210 p =xcalloc(delta_search_threads,sizeof(*p));22112212/* Partition the work amongst work threads. */2213for(i =0; i < delta_search_threads; i++) {2214unsigned sub_size = list_size / (delta_search_threads - i);22152216/* don't use too small segments or no deltas will be found */2217if(sub_size <2*window && i+1< delta_search_threads)2218 sub_size =0;22192220 p[i].window = window;2221 p[i].depth = depth;2222 p[i].processed = processed;2223 p[i].working =1;2224 p[i].data_ready =0;22252226/* try to split chunks on "path" boundaries */2227while(sub_size && sub_size < list_size &&2228 list[sub_size]->hash &&2229 list[sub_size]->hash == list[sub_size-1]->hash)2230 sub_size++;22312232 p[i].list = list;2233 p[i].list_size = sub_size;2234 p[i].remaining = sub_size;22352236 list += sub_size;2237 list_size -= sub_size;2238}22392240/* Start work threads. */2241for(i =0; i < delta_search_threads; i++) {2242if(!p[i].list_size)2243continue;2244pthread_mutex_init(&p[i].mutex, NULL);2245pthread_cond_init(&p[i].cond, NULL);2246 ret =pthread_create(&p[i].thread, NULL,2247 threaded_find_deltas, &p[i]);2248if(ret)2249die("unable to create thread:%s",strerror(ret));2250 active_threads++;2251}22522253/*2254 * Now let's wait for work completion. Each time a thread is done2255 * with its work, we steal half of the remaining work from the2256 * thread with the largest number of unprocessed objects and give2257 * it to that newly idle thread. This ensure good load balancing2258 * until the remaining object list segments are simply too short2259 * to be worth splitting anymore.2260 */2261while(active_threads) {2262struct thread_params *target = NULL;2263struct thread_params *victim = NULL;2264unsigned sub_size =0;22652266progress_lock();2267for(;;) {2268for(i =0; !target && i < delta_search_threads; i++)2269if(!p[i].working)2270 target = &p[i];2271if(target)2272break;2273pthread_cond_wait(&progress_cond, &progress_mutex);2274}22752276for(i =0; i < delta_search_threads; i++)2277if(p[i].remaining >2*window &&2278(!victim || victim->remaining < p[i].remaining))2279 victim = &p[i];2280if(victim) {2281 sub_size = victim->remaining /2;2282 list = victim->list + victim->list_size - sub_size;2283while(sub_size && list[0]->hash &&2284 list[0]->hash == list[-1]->hash) {2285 list++;2286 sub_size--;2287}2288if(!sub_size) {2289/*2290 * It is possible for some "paths" to have2291 * so many objects that no hash boundary2292 * might be found. Let's just steal the2293 * exact half in that case.2294 */2295 sub_size = victim->remaining /2;2296 list -= sub_size;2297}2298 target->list = list;2299 victim->list_size -= sub_size;2300 victim->remaining -= sub_size;2301}2302 target->list_size = sub_size;2303 target->remaining = sub_size;2304 target->working =1;2305progress_unlock();23062307pthread_mutex_lock(&target->mutex);2308 target->data_ready =1;2309pthread_cond_signal(&target->cond);2310pthread_mutex_unlock(&target->mutex);23112312if(!sub_size) {2313pthread_join(target->thread, NULL);2314pthread_cond_destroy(&target->cond);2315pthread_mutex_destroy(&target->mutex);2316 active_threads--;2317}2318}2319cleanup_threaded_search();2320free(p);2321}23222323#else2324#define ll_find_deltas(l, s, w, d, p) find_deltas(l, &s, w, d, p)2325#endif23262327static voidadd_tag_chain(const struct object_id *oid)2328{2329struct tag *tag;23302331/*2332 * We catch duplicates already in add_object_entry(), but we'd2333 * prefer to do this extra check to avoid having to parse the2334 * tag at all if we already know that it's being packed (e.g., if2335 * it was included via bitmaps, we would not have parsed it2336 * previously).2337 */2338if(packlist_find(&to_pack, oid->hash, NULL))2339return;23402341 tag =lookup_tag(oid->hash);2342while(1) {2343if(!tag ||parse_tag(tag) || !tag->tagged)2344die("unable to pack objects reachable from tag%s",2345oid_to_hex(oid));23462347add_object_entry(tag->object.oid.hash, OBJ_TAG, NULL,0);23482349if(tag->tagged->type != OBJ_TAG)2350return;23512352 tag = (struct tag *)tag->tagged;2353}2354}23552356static intadd_ref_tag(const char*path,const struct object_id *oid,int flag,void*cb_data)2357{2358struct object_id peeled;23592360if(starts_with(path,"refs/tags/") &&/* is a tag? */2361!peel_ref(path, peeled.hash) &&/* peelable? */2362packlist_find(&to_pack, peeled.hash, NULL))/* object packed? */2363add_tag_chain(oid);2364return0;2365}23662367static voidprepare_pack(int window,int depth)2368{2369struct object_entry **delta_list;2370uint32_t i, nr_deltas;2371unsigned n;23722373get_object_details();23742375/*2376 * If we're locally repacking then we need to be doubly careful2377 * from now on in order to make sure no stealth corruption gets2378 * propagated to the new pack. Clients receiving streamed packs2379 * should validate everything they get anyway so no need to incur2380 * the additional cost here in that case.2381 */2382if(!pack_to_stdout)2383 do_check_packed_object_crc =1;23842385if(!to_pack.nr_objects || !window || !depth)2386return;23872388ALLOC_ARRAY(delta_list, to_pack.nr_objects);2389 nr_deltas = n =0;23902391for(i =0; i < to_pack.nr_objects; i++) {2392struct object_entry *entry = to_pack.objects + i;23932394if(entry->delta)2395/* This happens if we decided to reuse existing2396 * delta from a pack. "reuse_delta &&" is implied.2397 */2398continue;23992400if(entry->size <50)2401continue;24022403if(entry->no_try_delta)2404continue;24052406if(!entry->preferred_base) {2407 nr_deltas++;2408if(entry->type <0)2409die("unable to get type of object%s",2410sha1_to_hex(entry->idx.sha1));2411}else{2412if(entry->type <0) {2413/*2414 * This object is not found, but we2415 * don't have to include it anyway.2416 */2417continue;2418}2419}24202421 delta_list[n++] = entry;2422}24232424if(nr_deltas && n >1) {2425unsigned nr_done =0;2426if(progress)2427 progress_state =start_progress(_("Compressing objects"),2428 nr_deltas);2429QSORT(delta_list, n, type_size_sort);2430ll_find_deltas(delta_list, n, window+1, depth, &nr_done);2431stop_progress(&progress_state);2432if(nr_done != nr_deltas)2433die("inconsistency with delta count");2434}2435free(delta_list);2436}24372438static intgit_pack_config(const char*k,const char*v,void*cb)2439{2440if(!strcmp(k,"pack.window")) {2441 window =git_config_int(k, v);2442return0;2443}2444if(!strcmp(k,"pack.windowmemory")) {2445 window_memory_limit =git_config_ulong(k, v);2446return0;2447}2448if(!strcmp(k,"pack.depth")) {2449 depth =git_config_int(k, v);2450return0;2451}2452if(!strcmp(k,"pack.deltacachesize")) {2453 max_delta_cache_size =git_config_int(k, v);2454return0;2455}2456if(!strcmp(k,"pack.deltacachelimit")) {2457 cache_max_small_delta_size =git_config_int(k, v);2458return0;2459}2460if(!strcmp(k,"pack.writebitmaphashcache")) {2461if(git_config_bool(k, v))2462 write_bitmap_options |= BITMAP_OPT_HASH_CACHE;2463else2464 write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;2465}2466if(!strcmp(k,"pack.usebitmaps")) {2467 use_bitmap_index_default =git_config_bool(k, v);2468return0;2469}2470if(!strcmp(k,"pack.threads")) {2471 delta_search_threads =git_config_int(k, v);2472if(delta_search_threads <0)2473die("invalid number of threads specified (%d)",2474 delta_search_threads);2475#ifdef NO_PTHREADS2476if(delta_search_threads !=1)2477warning("no threads support, ignoring%s", k);2478#endif2479return0;2480}2481if(!strcmp(k,"pack.indexversion")) {2482 pack_idx_opts.version =git_config_int(k, v);2483if(pack_idx_opts.version >2)2484die("bad pack.indexversion=%"PRIu32,2485 pack_idx_opts.version);2486return0;2487}2488returngit_default_config(k, v, cb);2489}24902491static voidread_object_list_from_stdin(void)2492{2493char line[40+1+ PATH_MAX +2];2494unsigned char sha1[20];24952496for(;;) {2497if(!fgets(line,sizeof(line), stdin)) {2498if(feof(stdin))2499break;2500if(!ferror(stdin))2501die("fgets returned NULL, not EOF, not error!");2502if(errno != EINTR)2503die_errno("fgets");2504clearerr(stdin);2505continue;2506}2507if(line[0] =='-') {2508if(get_sha1_hex(line+1, sha1))2509die("expected edge sha1, got garbage:\n%s",2510 line);2511add_preferred_base(sha1);2512continue;2513}2514if(get_sha1_hex(line, sha1))2515die("expected sha1, got garbage:\n%s", line);25162517add_preferred_base_object(line+41);2518add_object_entry(sha1,0, line+41,0);2519}2520}25212522#define OBJECT_ADDED (1u<<20)25232524static voidshow_commit(struct commit *commit,void*data)2525{2526add_object_entry(commit->object.oid.hash, OBJ_COMMIT, NULL,0);2527 commit->object.flags |= OBJECT_ADDED;25282529if(write_bitmap_index)2530index_commit_for_bitmap(commit);2531}25322533static voidshow_object(struct object *obj,const char*name,void*data)2534{2535add_preferred_base_object(name);2536add_object_entry(obj->oid.hash, obj->type, name,0);2537 obj->flags |= OBJECT_ADDED;2538}25392540static voidshow_edge(struct commit *commit)2541{2542add_preferred_base(commit->object.oid.hash);2543}25442545struct in_pack_object {2546 off_t offset;2547struct object *object;2548};25492550struct in_pack {2551int alloc;2552int nr;2553struct in_pack_object *array;2554};25552556static voidmark_in_pack_object(struct object *object,struct packed_git *p,struct in_pack *in_pack)2557{2558 in_pack->array[in_pack->nr].offset =find_pack_entry_one(object->oid.hash, p);2559 in_pack->array[in_pack->nr].object = object;2560 in_pack->nr++;2561}25622563/*2564 * Compare the objects in the offset order, in order to emulate the2565 * "git rev-list --objects" output that produced the pack originally.2566 */2567static intofscmp(const void*a_,const void*b_)2568{2569struct in_pack_object *a = (struct in_pack_object *)a_;2570struct in_pack_object *b = (struct in_pack_object *)b_;25712572if(a->offset < b->offset)2573return-1;2574else if(a->offset > b->offset)2575return1;2576else2577returnoidcmp(&a->object->oid, &b->object->oid);2578}25792580static voidadd_objects_in_unpacked_packs(struct rev_info *revs)2581{2582struct packed_git *p;2583struct in_pack in_pack;2584uint32_t i;25852586memset(&in_pack,0,sizeof(in_pack));25872588for(p = packed_git; p; p = p->next) {2589const unsigned char*sha1;2590struct object *o;25912592if(!p->pack_local || p->pack_keep)2593continue;2594if(open_pack_index(p))2595die("cannot open pack index");25962597ALLOC_GROW(in_pack.array,2598 in_pack.nr + p->num_objects,2599 in_pack.alloc);26002601for(i =0; i < p->num_objects; i++) {2602 sha1 =nth_packed_object_sha1(p, i);2603 o =lookup_unknown_object(sha1);2604if(!(o->flags & OBJECT_ADDED))2605mark_in_pack_object(o, p, &in_pack);2606 o->flags |= OBJECT_ADDED;2607}2608}26092610if(in_pack.nr) {2611QSORT(in_pack.array, in_pack.nr, ofscmp);2612for(i =0; i < in_pack.nr; i++) {2613struct object *o = in_pack.array[i].object;2614add_object_entry(o->oid.hash, o->type,"",0);2615}2616}2617free(in_pack.array);2618}26192620static intadd_loose_object(const struct object_id *oid,const char*path,2621void*data)2622{2623enum object_type type =sha1_object_info(oid->hash, NULL);26242625if(type <0) {2626warning("loose object at%scould not be examined", path);2627return0;2628}26292630add_object_entry(oid->hash, type,"",0);2631return0;2632}26332634/*2635 * We actually don't even have to worry about reachability here.2636 * add_object_entry will weed out duplicates, so we just add every2637 * loose object we find.2638 */2639static voidadd_unreachable_loose_objects(void)2640{2641for_each_loose_file_in_objdir(get_object_directory(),2642 add_loose_object,2643 NULL, NULL, NULL);2644}26452646static inthas_sha1_pack_kept_or_nonlocal(const unsigned char*sha1)2647{2648static struct packed_git *last_found = (void*)1;2649struct packed_git *p;26502651 p = (last_found != (void*)1) ? last_found : packed_git;26522653while(p) {2654if((!p->pack_local || p->pack_keep) &&2655find_pack_entry_one(sha1, p)) {2656 last_found = p;2657return1;2658}2659if(p == last_found)2660 p = packed_git;2661else2662 p = p->next;2663if(p == last_found)2664 p = p->next;2665}2666return0;2667}26682669/*2670 * Store a list of sha1s that are should not be discarded2671 * because they are either written too recently, or are2672 * reachable from another object that was.2673 *2674 * This is filled by get_object_list.2675 */2676static struct oid_array recent_objects;26772678static intloosened_object_can_be_discarded(const struct object_id *oid,2679unsigned long mtime)2680{2681if(!unpack_unreachable_expiration)2682return0;2683if(mtime > unpack_unreachable_expiration)2684return0;2685if(oid_array_lookup(&recent_objects, oid) >=0)2686return0;2687return1;2688}26892690static voidloosen_unused_packed_objects(struct rev_info *revs)2691{2692struct packed_git *p;2693uint32_t i;2694struct object_id oid;26952696for(p = packed_git; p; p = p->next) {2697if(!p->pack_local || p->pack_keep)2698continue;26992700if(open_pack_index(p))2701die("cannot open pack index");27022703for(i =0; i < p->num_objects; i++) {2704nth_packed_object_oid(&oid, p, i);2705if(!packlist_find(&to_pack, oid.hash, NULL) &&2706!has_sha1_pack_kept_or_nonlocal(oid.hash) &&2707!loosened_object_can_be_discarded(&oid, p->mtime))2708if(force_object_loose(oid.hash, p->mtime))2709die("unable to force loose object");2710}2711}2712}27132714/*2715 * This tracks any options which pack-reuse code expects to be on, or which a2716 * reader of the pack might not understand, and which would therefore prevent2717 * blind reuse of what we have on disk.2718 */2719static intpack_options_allow_reuse(void)2720{2721return pack_to_stdout && allow_ofs_delta;2722}27232724static intget_object_list_from_bitmap(struct rev_info *revs)2725{2726if(prepare_bitmap_walk(revs) <0)2727return-1;27282729if(pack_options_allow_reuse() &&2730!reuse_partial_packfile_from_bitmap(2731&reuse_packfile,2732&reuse_packfile_objects,2733&reuse_packfile_offset)) {2734assert(reuse_packfile_objects);2735 nr_result += reuse_packfile_objects;2736display_progress(progress_state, nr_result);2737}27382739traverse_bitmap_commit_list(&add_object_entry_from_bitmap);2740return0;2741}27422743static voidrecord_recent_object(struct object *obj,2744const char*name,2745void*data)2746{2747oid_array_append(&recent_objects, &obj->oid);2748}27492750static voidrecord_recent_commit(struct commit *commit,void*data)2751{2752oid_array_append(&recent_objects, &commit->object.oid);2753}27542755static voidget_object_list(int ac,const char**av)2756{2757struct rev_info revs;2758char line[1000];2759int flags =0;27602761init_revisions(&revs, NULL);2762 save_commit_buffer =0;2763setup_revisions(ac, av, &revs, NULL);27642765/* make sure shallows are read */2766is_repository_shallow();27672768while(fgets(line,sizeof(line), stdin) != NULL) {2769int len =strlen(line);2770if(len && line[len -1] =='\n')2771 line[--len] =0;2772if(!len)2773break;2774if(*line =='-') {2775if(!strcmp(line,"--not")) {2776 flags ^= UNINTERESTING;2777 write_bitmap_index =0;2778continue;2779}2780if(starts_with(line,"--shallow ")) {2781unsigned char sha1[20];2782if(get_sha1_hex(line +10, sha1))2783die("not an SHA-1 '%s'", line +10);2784register_shallow(sha1);2785 use_bitmap_index =0;2786continue;2787}2788die("not a rev '%s'", line);2789}2790if(handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME))2791die("bad revision '%s'", line);2792}27932794if(use_bitmap_index && !get_object_list_from_bitmap(&revs))2795return;27962797if(prepare_revision_walk(&revs))2798die("revision walk setup failed");2799mark_edges_uninteresting(&revs, show_edge);2800traverse_commit_list(&revs, show_commit, show_object, NULL);28012802if(unpack_unreachable_expiration) {2803 revs.ignore_missing_links =1;2804if(add_unseen_recent_objects_to_traversal(&revs,2805 unpack_unreachable_expiration))2806die("unable to add recent objects");2807if(prepare_revision_walk(&revs))2808die("revision walk setup failed");2809traverse_commit_list(&revs, record_recent_commit,2810 record_recent_object, NULL);2811}28122813if(keep_unreachable)2814add_objects_in_unpacked_packs(&revs);2815if(pack_loose_unreachable)2816add_unreachable_loose_objects();2817if(unpack_unreachable)2818loosen_unused_packed_objects(&revs);28192820oid_array_clear(&recent_objects);2821}28222823static intoption_parse_index_version(const struct option *opt,2824const char*arg,int unset)2825{2826char*c;2827const char*val = arg;2828 pack_idx_opts.version =strtoul(val, &c,10);2829if(pack_idx_opts.version >2)2830die(_("unsupported index version%s"), val);2831if(*c ==','&& c[1])2832 pack_idx_opts.off32_limit =strtoul(c+1, &c,0);2833if(*c || pack_idx_opts.off32_limit &0x80000000)2834die(_("bad index version '%s'"), val);2835return0;2836}28372838static intoption_parse_unpack_unreachable(const struct option *opt,2839const char*arg,int unset)2840{2841if(unset) {2842 unpack_unreachable =0;2843 unpack_unreachable_expiration =0;2844}2845else{2846 unpack_unreachable =1;2847if(arg)2848 unpack_unreachable_expiration =approxidate(arg);2849}2850return0;2851}28522853intcmd_pack_objects(int argc,const char**argv,const char*prefix)2854{2855int use_internal_rev_list =0;2856int thin =0;2857int shallow =0;2858int all_progress_implied =0;2859struct argv_array rp = ARGV_ARRAY_INIT;2860int rev_list_unpacked =0, rev_list_all =0, rev_list_reflog =0;2861int rev_list_index =0;2862struct option pack_objects_options[] = {2863OPT_SET_INT('q',"quiet", &progress,2864N_("do not show progress meter"),0),2865OPT_SET_INT(0,"progress", &progress,2866N_("show progress meter"),1),2867OPT_SET_INT(0,"all-progress", &progress,2868N_("show progress meter during object writing phase"),2),2869OPT_BOOL(0,"all-progress-implied",2870&all_progress_implied,2871N_("similar to --all-progress when progress meter is shown")),2872{ OPTION_CALLBACK,0,"index-version", NULL,N_("version[,offset]"),2873N_("write the pack index file in the specified idx format version"),28740, option_parse_index_version },2875OPT_MAGNITUDE(0,"max-pack-size", &pack_size_limit,2876N_("maximum size of each output pack file")),2877OPT_BOOL(0,"local", &local,2878N_("ignore borrowed objects from alternate object store")),2879OPT_BOOL(0,"incremental", &incremental,2880N_("ignore packed objects")),2881OPT_INTEGER(0,"window", &window,2882N_("limit pack window by objects")),2883OPT_MAGNITUDE(0,"window-memory", &window_memory_limit,2884N_("limit pack window by memory in addition to object limit")),2885OPT_INTEGER(0,"depth", &depth,2886N_("maximum length of delta chain allowed in the resulting pack")),2887OPT_BOOL(0,"reuse-delta", &reuse_delta,2888N_("reuse existing deltas")),2889OPT_BOOL(0,"reuse-object", &reuse_object,2890N_("reuse existing objects")),2891OPT_BOOL(0,"delta-base-offset", &allow_ofs_delta,2892N_("use OFS_DELTA objects")),2893OPT_INTEGER(0,"threads", &delta_search_threads,2894N_("use threads when searching for best delta matches")),2895OPT_BOOL(0,"non-empty", &non_empty,2896N_("do not create an empty pack output")),2897OPT_BOOL(0,"revs", &use_internal_rev_list,2898N_("read revision arguments from standard input")),2899{ OPTION_SET_INT,0,"unpacked", &rev_list_unpacked, NULL,2900N_("limit the objects to those that are not yet packed"),2901 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL,1},2902{ OPTION_SET_INT,0,"all", &rev_list_all, NULL,2903N_("include objects reachable from any reference"),2904 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL,1},2905{ OPTION_SET_INT,0,"reflog", &rev_list_reflog, NULL,2906N_("include objects referred by reflog entries"),2907 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL,1},2908{ OPTION_SET_INT,0,"indexed-objects", &rev_list_index, NULL,2909N_("include objects referred to by the index"),2910 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL,1},2911OPT_BOOL(0,"stdout", &pack_to_stdout,2912N_("output pack to stdout")),2913OPT_BOOL(0,"include-tag", &include_tag,2914N_("include tag objects that refer to objects to be packed")),2915OPT_BOOL(0,"keep-unreachable", &keep_unreachable,2916N_("keep unreachable objects")),2917OPT_BOOL(0,"pack-loose-unreachable", &pack_loose_unreachable,2918N_("pack loose unreachable objects")),2919{ OPTION_CALLBACK,0,"unpack-unreachable", NULL,N_("time"),2920N_("unpack unreachable objects newer than <time>"),2921 PARSE_OPT_OPTARG, option_parse_unpack_unreachable },2922OPT_BOOL(0,"thin", &thin,2923N_("create thin packs")),2924OPT_BOOL(0,"shallow", &shallow,2925N_("create packs suitable for shallow fetches")),2926OPT_BOOL(0,"honor-pack-keep", &ignore_packed_keep,2927N_("ignore packs that have companion .keep file")),2928OPT_INTEGER(0,"compression", &pack_compression_level,2929N_("pack compression level")),2930OPT_SET_INT(0,"keep-true-parents", &grafts_replace_parents,2931N_("do not hide commits by grafts"),0),2932OPT_BOOL(0,"use-bitmap-index", &use_bitmap_index,2933N_("use a bitmap index if available to speed up counting objects")),2934OPT_BOOL(0,"write-bitmap-index", &write_bitmap_index,2935N_("write a bitmap index together with the pack index")),2936OPT_END(),2937};29382939 check_replace_refs =0;29402941reset_pack_idx_option(&pack_idx_opts);2942git_config(git_pack_config, NULL);29432944 progress =isatty(2);2945 argc =parse_options(argc, argv, prefix, pack_objects_options,2946 pack_usage,0);29472948if(argc) {2949 base_name = argv[0];2950 argc--;2951}2952if(pack_to_stdout != !base_name || argc)2953usage_with_options(pack_usage, pack_objects_options);29542955argv_array_push(&rp,"pack-objects");2956if(thin) {2957 use_internal_rev_list =1;2958argv_array_push(&rp, shallow2959?"--objects-edge-aggressive"2960:"--objects-edge");2961}else2962argv_array_push(&rp,"--objects");29632964if(rev_list_all) {2965 use_internal_rev_list =1;2966argv_array_push(&rp,"--all");2967}2968if(rev_list_reflog) {2969 use_internal_rev_list =1;2970argv_array_push(&rp,"--reflog");2971}2972if(rev_list_index) {2973 use_internal_rev_list =1;2974argv_array_push(&rp,"--indexed-objects");2975}2976if(rev_list_unpacked) {2977 use_internal_rev_list =1;2978argv_array_push(&rp,"--unpacked");2979}29802981if(!reuse_object)2982 reuse_delta =0;2983if(pack_compression_level == -1)2984 pack_compression_level = Z_DEFAULT_COMPRESSION;2985else if(pack_compression_level <0|| pack_compression_level > Z_BEST_COMPRESSION)2986die("bad pack compression level%d", pack_compression_level);29872988if(!delta_search_threads)/* --threads=0 means autodetect */2989 delta_search_threads =online_cpus();29902991#ifdef NO_PTHREADS2992if(delta_search_threads !=1)2993warning("no threads support, ignoring --threads");2994#endif2995if(!pack_to_stdout && !pack_size_limit)2996 pack_size_limit = pack_size_limit_cfg;2997if(pack_to_stdout && pack_size_limit)2998die("--max-pack-size cannot be used to build a pack for transfer.");2999if(pack_size_limit && pack_size_limit <1024*1024) {3000warning("minimum pack size limit is 1 MiB");3001 pack_size_limit =1024*1024;3002}30033004if(!pack_to_stdout && thin)3005die("--thin cannot be used to build an indexable pack.");30063007if(keep_unreachable && unpack_unreachable)3008die("--keep-unreachable and --unpack-unreachable are incompatible.");3009if(!rev_list_all || !rev_list_reflog || !rev_list_index)3010 unpack_unreachable_expiration =0;30113012/*3013 * "soft" reasons not to use bitmaps - for on-disk repack by default we want3014 *3015 * - to produce good pack (with bitmap index not-yet-packed objects are3016 * packed in suboptimal order).3017 *3018 * - to use more robust pack-generation codepath (avoiding possible3019 * bugs in bitmap code and possible bitmap index corruption).3020 */3021if(!pack_to_stdout)3022 use_bitmap_index_default =0;30233024if(use_bitmap_index <0)3025 use_bitmap_index = use_bitmap_index_default;30263027/* "hard" reasons not to use bitmaps; these just won't work at all */3028if(!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) ||is_repository_shallow())3029 use_bitmap_index =0;30303031if(pack_to_stdout || !rev_list_all)3032 write_bitmap_index =0;30333034if(progress && all_progress_implied)3035 progress =2;30363037prepare_packed_git();3038if(ignore_packed_keep) {3039struct packed_git *p;3040for(p = packed_git; p; p = p->next)3041if(p->pack_local && p->pack_keep)3042break;3043if(!p)/* no keep-able packs found */3044 ignore_packed_keep =0;3045}3046if(local) {3047/*3048 * unlike ignore_packed_keep above, we do not want to3049 * unset "local" based on looking at packs, as it3050 * also covers non-local objects3051 */3052struct packed_git *p;3053for(p = packed_git; p; p = p->next) {3054if(!p->pack_local) {3055 have_non_local_packs =1;3056break;3057}3058}3059}30603061if(progress)3062 progress_state =start_progress(_("Counting objects"),0);3063if(!use_internal_rev_list)3064read_object_list_from_stdin();3065else{3066get_object_list(rp.argc, rp.argv);3067argv_array_clear(&rp);3068}3069cleanup_preferred_base();3070if(include_tag && nr_result)3071for_each_ref(add_ref_tag, NULL);3072stop_progress(&progress_state);30733074if(non_empty && !nr_result)3075return0;3076if(nr_result)3077prepare_pack(window, depth);3078write_pack_file();3079if(progress)3080fprintf(stderr,"Total %"PRIu32" (delta %"PRIu32"),"3081" reused %"PRIu32" (delta %"PRIu32")\n",3082 written, written_delta, reused, reused_delta);3083return0;3084}