1/* 2 * GIT - The information manager from hell 3 * 4 * Copyright (C) Linus Torvalds, 2005 5 * 6 * This handles basic git sha1 object files - packing, unpacking, 7 * creation etc. 8 */ 9#include"cache.h" 10#include"string-list.h" 11#include"lockfile.h" 12#include"delta.h" 13#include"pack.h" 14#include"blob.h" 15#include"commit.h" 16#include"run-command.h" 17#include"tag.h" 18#include"tree.h" 19#include"tree-walk.h" 20#include"refs.h" 21#include"pack-revindex.h" 22#include"sha1-lookup.h" 23#include"bulk-checkin.h" 24#include"streaming.h" 25#include"dir.h" 26#include"mru.h" 27#include"list.h" 28#include"mergesort.h" 29#include"quote.h" 30 31#define SZ_FMT PRIuMAX 32staticinlineuintmax_tsz_fmt(size_t s) {return s; } 33 34const unsigned char null_sha1[20]; 35const struct object_id null_oid; 36const struct object_id empty_tree_oid = { 37 EMPTY_TREE_SHA1_BIN_LITERAL 38}; 39const struct object_id empty_blob_oid = { 40 EMPTY_BLOB_SHA1_BIN_LITERAL 41}; 42 43/* 44 * This is meant to hold a *small* number of objects that you would 45 * want read_sha1_file() to be able to return, but yet you do not want 46 * to write them into the object store (e.g. a browse-only 47 * application). 48 */ 49static struct cached_object { 50unsigned char sha1[20]; 51enum object_type type; 52void*buf; 53unsigned long size; 54} *cached_objects; 55static int cached_object_nr, cached_object_alloc; 56 57static struct cached_object empty_tree = { 58 EMPTY_TREE_SHA1_BIN_LITERAL, 59 OBJ_TREE, 60"", 610 62}; 63 64static struct cached_object *find_cached_object(const unsigned char*sha1) 65{ 66int i; 67struct cached_object *co = cached_objects; 68 69for(i =0; i < cached_object_nr; i++, co++) { 70if(!hashcmp(co->sha1, sha1)) 71return co; 72} 73if(!hashcmp(sha1, empty_tree.sha1)) 74return&empty_tree; 75return NULL; 76} 77 78intmkdir_in_gitdir(const char*path) 79{ 80if(mkdir(path,0777)) { 81int saved_errno = errno; 82struct stat st; 83struct strbuf sb = STRBUF_INIT; 84 85if(errno != EEXIST) 86return-1; 87/* 88 * Are we looking at a path in a symlinked worktree 89 * whose original repository does not yet have it? 90 * e.g. .git/rr-cache pointing at its original 91 * repository in which the user hasn't performed any 92 * conflict resolution yet? 93 */ 94if(lstat(path, &st) || !S_ISLNK(st.st_mode) || 95strbuf_readlink(&sb, path, st.st_size) || 96!is_absolute_path(sb.buf) || 97mkdir(sb.buf,0777)) { 98strbuf_release(&sb); 99 errno = saved_errno; 100return-1; 101} 102strbuf_release(&sb); 103} 104returnadjust_shared_perm(path); 105} 106 107enum scld_error safe_create_leading_directories(char*path) 108{ 109char*next_component = path +offset_1st_component(path); 110enum scld_error ret = SCLD_OK; 111 112while(ret == SCLD_OK && next_component) { 113struct stat st; 114char*slash = next_component, slash_character; 115 116while(*slash && !is_dir_sep(*slash)) 117 slash++; 118 119if(!*slash) 120break; 121 122 next_component = slash +1; 123while(is_dir_sep(*next_component)) 124 next_component++; 125if(!*next_component) 126break; 127 128 slash_character = *slash; 129*slash ='\0'; 130if(!stat(path, &st)) { 131/* path exists */ 132if(!S_ISDIR(st.st_mode)) { 133 errno = ENOTDIR; 134 ret = SCLD_EXISTS; 135} 136}else if(mkdir(path,0777)) { 137if(errno == EEXIST && 138!stat(path, &st) &&S_ISDIR(st.st_mode)) 139;/* somebody created it since we checked */ 140else if(errno == ENOENT) 141/* 142 * Either mkdir() failed because 143 * somebody just pruned the containing 144 * directory, or stat() failed because 145 * the file that was in our way was 146 * just removed. Either way, inform 147 * the caller that it might be worth 148 * trying again: 149 */ 150 ret = SCLD_VANISHED; 151else 152 ret = SCLD_FAILED; 153}else if(adjust_shared_perm(path)) { 154 ret = SCLD_PERMS; 155} 156*slash = slash_character; 157} 158return ret; 159} 160 161enum scld_error safe_create_leading_directories_const(const char*path) 162{ 163int save_errno; 164/* path points to cache entries, so xstrdup before messing with it */ 165char*buf =xstrdup(path); 166enum scld_error result =safe_create_leading_directories(buf); 167 168 save_errno = errno; 169free(buf); 170 errno = save_errno; 171return result; 172} 173 174intraceproof_create_file(const char*path, create_file_fn fn,void*cb) 175{ 176/* 177 * The number of times we will try to remove empty directories 178 * in the way of path. This is only 1 because if another 179 * process is racily creating directories that conflict with 180 * us, we don't want to fight against them. 181 */ 182int remove_directories_remaining =1; 183 184/* 185 * The number of times that we will try to create the 186 * directories containing path. We are willing to attempt this 187 * more than once, because another process could be trying to 188 * clean up empty directories at the same time as we are 189 * trying to create them. 190 */ 191int create_directories_remaining =3; 192 193/* A scratch copy of path, filled lazily if we need it: */ 194struct strbuf path_copy = STRBUF_INIT; 195 196int ret, save_errno; 197 198/* Sanity check: */ 199assert(*path); 200 201retry_fn: 202 ret =fn(path, cb); 203 save_errno = errno; 204if(!ret) 205goto out; 206 207if(errno == EISDIR && remove_directories_remaining-- >0) { 208/* 209 * A directory is in the way. Maybe it is empty; try 210 * to remove it: 211 */ 212if(!path_copy.len) 213strbuf_addstr(&path_copy, path); 214 215if(!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY)) 216goto retry_fn; 217}else if(errno == ENOENT && create_directories_remaining-- >0) { 218/* 219 * Maybe the containing directory didn't exist, or 220 * maybe it was just deleted by a process that is 221 * racing with us to clean up empty directories. Try 222 * to create it: 223 */ 224enum scld_error scld_result; 225 226if(!path_copy.len) 227strbuf_addstr(&path_copy, path); 228 229do{ 230 scld_result =safe_create_leading_directories(path_copy.buf); 231if(scld_result == SCLD_OK) 232goto retry_fn; 233}while(scld_result == SCLD_VANISHED && create_directories_remaining-- >0); 234} 235 236out: 237strbuf_release(&path_copy); 238 errno = save_errno; 239return ret; 240} 241 242static voidfill_sha1_path(struct strbuf *buf,const unsigned char*sha1) 243{ 244int i; 245for(i =0; i <20; i++) { 246static char hex[] ="0123456789abcdef"; 247unsigned int val = sha1[i]; 248strbuf_addch(buf, hex[val >>4]); 249strbuf_addch(buf, hex[val &0xf]); 250if(!i) 251strbuf_addch(buf,'/'); 252} 253} 254 255const char*sha1_file_name(const unsigned char*sha1) 256{ 257static struct strbuf buf = STRBUF_INIT; 258 259strbuf_reset(&buf); 260strbuf_addf(&buf,"%s/",get_object_directory()); 261 262fill_sha1_path(&buf, sha1); 263return buf.buf; 264} 265 266struct strbuf *alt_scratch_buf(struct alternate_object_database *alt) 267{ 268strbuf_setlen(&alt->scratch, alt->base_len); 269return&alt->scratch; 270} 271 272static const char*alt_sha1_path(struct alternate_object_database *alt, 273const unsigned char*sha1) 274{ 275struct strbuf *buf =alt_scratch_buf(alt); 276fill_sha1_path(buf, sha1); 277return buf->buf; 278} 279 280/* 281 * Return the name of the pack or index file with the specified sha1 282 * in its filename. *base and *name are scratch space that must be 283 * provided by the caller. which should be "pack" or "idx". 284 */ 285static char*sha1_get_pack_name(const unsigned char*sha1, 286struct strbuf *buf, 287const char*which) 288{ 289strbuf_reset(buf); 290strbuf_addf(buf,"%s/pack/pack-%s.%s",get_object_directory(), 291sha1_to_hex(sha1), which); 292return buf->buf; 293} 294 295char*sha1_pack_name(const unsigned char*sha1) 296{ 297static struct strbuf buf = STRBUF_INIT; 298returnsha1_get_pack_name(sha1, &buf,"pack"); 299} 300 301char*sha1_pack_index_name(const unsigned char*sha1) 302{ 303static struct strbuf buf = STRBUF_INIT; 304returnsha1_get_pack_name(sha1, &buf,"idx"); 305} 306 307struct alternate_object_database *alt_odb_list; 308static struct alternate_object_database **alt_odb_tail; 309 310/* 311 * Return non-zero iff the path is usable as an alternate object database. 312 */ 313static intalt_odb_usable(struct strbuf *path,const char*normalized_objdir) 314{ 315struct alternate_object_database *alt; 316 317/* Detect cases where alternate disappeared */ 318if(!is_directory(path->buf)) { 319error("object directory%sdoes not exist; " 320"check .git/objects/info/alternates.", 321 path->buf); 322return0; 323} 324 325/* 326 * Prevent the common mistake of listing the same 327 * thing twice, or object directory itself. 328 */ 329for(alt = alt_odb_list; alt; alt = alt->next) { 330if(!fspathcmp(path->buf, alt->path)) 331return0; 332} 333if(!fspathcmp(path->buf, normalized_objdir)) 334return0; 335 336return1; 337} 338 339/* 340 * Prepare alternate object database registry. 341 * 342 * The variable alt_odb_list points at the list of struct 343 * alternate_object_database. The elements on this list come from 344 * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT 345 * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates, 346 * whose contents is similar to that environment variable but can be 347 * LF separated. Its base points at a statically allocated buffer that 348 * contains "/the/directory/corresponding/to/.git/objects/...", while 349 * its name points just after the slash at the end of ".git/objects/" 350 * in the example above, and has enough space to hold 40-byte hex 351 * SHA1, an extra slash for the first level indirection, and the 352 * terminating NUL. 353 */ 354static intlink_alt_odb_entry(const char*entry,const char*relative_base, 355int depth,const char*normalized_objdir) 356{ 357struct alternate_object_database *ent; 358struct strbuf pathbuf = STRBUF_INIT; 359 360if(!is_absolute_path(entry) && relative_base) { 361strbuf_realpath(&pathbuf, relative_base,1); 362strbuf_addch(&pathbuf,'/'); 363} 364strbuf_addstr(&pathbuf, entry); 365 366if(strbuf_normalize_path(&pathbuf) <0&& relative_base) { 367error("unable to normalize alternate object path:%s", 368 pathbuf.buf); 369strbuf_release(&pathbuf); 370return-1; 371} 372 373/* 374 * The trailing slash after the directory name is given by 375 * this function at the end. Remove duplicates. 376 */ 377while(pathbuf.len && pathbuf.buf[pathbuf.len -1] =='/') 378strbuf_setlen(&pathbuf, pathbuf.len -1); 379 380if(!alt_odb_usable(&pathbuf, normalized_objdir)) { 381strbuf_release(&pathbuf); 382return-1; 383} 384 385 ent =alloc_alt_odb(pathbuf.buf); 386 387/* add the alternate entry */ 388*alt_odb_tail = ent; 389 alt_odb_tail = &(ent->next); 390 ent->next = NULL; 391 392/* recursively add alternates */ 393read_info_alternates(pathbuf.buf, depth +1); 394 395strbuf_release(&pathbuf); 396return0; 397} 398 399static const char*parse_alt_odb_entry(const char*string, 400int sep, 401struct strbuf *out) 402{ 403const char*end; 404 405strbuf_reset(out); 406 407if(*string =='#') { 408/* comment; consume up to next separator */ 409 end =strchrnul(string, sep); 410}else if(*string =='"'&& !unquote_c_style(out, string, &end)) { 411/* 412 * quoted path; unquote_c_style has copied the 413 * data for us and set "end". Broken quoting (e.g., 414 * an entry that doesn't end with a quote) falls 415 * back to the unquoted case below. 416 */ 417}else{ 418/* normal, unquoted path */ 419 end =strchrnul(string, sep); 420strbuf_add(out, string, end - string); 421} 422 423if(*end) 424 end++; 425return end; 426} 427 428static voidlink_alt_odb_entries(const char*alt,int len,int sep, 429const char*relative_base,int depth) 430{ 431struct strbuf objdirbuf = STRBUF_INIT; 432struct strbuf entry = STRBUF_INIT; 433 434if(depth >5) { 435error("%s: ignoring alternate object stores, nesting too deep.", 436 relative_base); 437return; 438} 439 440strbuf_add_absolute_path(&objdirbuf,get_object_directory()); 441if(strbuf_normalize_path(&objdirbuf) <0) 442die("unable to normalize object directory:%s", 443 objdirbuf.buf); 444 445while(*alt) { 446 alt =parse_alt_odb_entry(alt, sep, &entry); 447if(!entry.len) 448continue; 449link_alt_odb_entry(entry.buf, relative_base, depth, objdirbuf.buf); 450} 451strbuf_release(&entry); 452strbuf_release(&objdirbuf); 453} 454 455voidread_info_alternates(const char* relative_base,int depth) 456{ 457char*map; 458size_t mapsz; 459struct stat st; 460char*path; 461int fd; 462 463 path =xstrfmt("%s/info/alternates", relative_base); 464 fd =git_open(path); 465free(path); 466if(fd <0) 467return; 468if(fstat(fd, &st) || (st.st_size ==0)) { 469close(fd); 470return; 471} 472 mapsz =xsize_t(st.st_size); 473 map =xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd,0); 474close(fd); 475 476link_alt_odb_entries(map, mapsz,'\n', relative_base, depth); 477 478munmap(map, mapsz); 479} 480 481struct alternate_object_database *alloc_alt_odb(const char*dir) 482{ 483struct alternate_object_database *ent; 484 485FLEX_ALLOC_STR(ent, path, dir); 486strbuf_init(&ent->scratch,0); 487strbuf_addf(&ent->scratch,"%s/", dir); 488 ent->base_len = ent->scratch.len; 489 490return ent; 491} 492 493voidadd_to_alternates_file(const char*reference) 494{ 495struct lock_file *lock =xcalloc(1,sizeof(struct lock_file)); 496char*alts =git_pathdup("objects/info/alternates"); 497FILE*in, *out; 498 499hold_lock_file_for_update(lock, alts, LOCK_DIE_ON_ERROR); 500 out =fdopen_lock_file(lock,"w"); 501if(!out) 502die_errno("unable to fdopen alternates lockfile"); 503 504 in =fopen(alts,"r"); 505if(in) { 506struct strbuf line = STRBUF_INIT; 507int found =0; 508 509while(strbuf_getline(&line, in) != EOF) { 510if(!strcmp(reference, line.buf)) { 511 found =1; 512break; 513} 514fprintf_or_die(out,"%s\n", line.buf); 515} 516 517strbuf_release(&line); 518fclose(in); 519 520if(found) { 521rollback_lock_file(lock); 522 lock = NULL; 523} 524} 525else if(errno != ENOENT) 526die_errno("unable to read alternates file"); 527 528if(lock) { 529fprintf_or_die(out,"%s\n", reference); 530if(commit_lock_file(lock)) 531die_errno("unable to move new alternates file into place"); 532if(alt_odb_tail) 533link_alt_odb_entries(reference,strlen(reference),'\n', NULL,0); 534} 535free(alts); 536} 537 538voidadd_to_alternates_memory(const char*reference) 539{ 540/* 541 * Make sure alternates are initialized, or else our entry may be 542 * overwritten when they are. 543 */ 544prepare_alt_odb(); 545 546link_alt_odb_entries(reference,strlen(reference),'\n', NULL,0); 547} 548 549/* 550 * Compute the exact path an alternate is at and returns it. In case of 551 * error NULL is returned and the human readable error is added to `err` 552 * `path` may be relative and should point to $GITDIR. 553 * `err` must not be null. 554 */ 555char*compute_alternate_path(const char*path,struct strbuf *err) 556{ 557char*ref_git = NULL; 558const char*repo, *ref_git_s; 559int seen_error =0; 560 561 ref_git_s =real_path_if_valid(path); 562if(!ref_git_s) { 563 seen_error =1; 564strbuf_addf(err,_("path '%s' does not exist"), path); 565goto out; 566}else 567/* 568 * Beware: read_gitfile(), real_path() and mkpath() 569 * return static buffer 570 */ 571 ref_git =xstrdup(ref_git_s); 572 573 repo =read_gitfile(ref_git); 574if(!repo) 575 repo =read_gitfile(mkpath("%s/.git", ref_git)); 576if(repo) { 577free(ref_git); 578 ref_git =xstrdup(repo); 579} 580 581if(!repo &&is_directory(mkpath("%s/.git/objects", ref_git))) { 582char*ref_git_git =mkpathdup("%s/.git", ref_git); 583free(ref_git); 584 ref_git = ref_git_git; 585}else if(!is_directory(mkpath("%s/objects", ref_git))) { 586struct strbuf sb = STRBUF_INIT; 587 seen_error =1; 588if(get_common_dir(&sb, ref_git)) { 589strbuf_addf(err, 590_("reference repository '%s' as a linked " 591"checkout is not supported yet."), 592 path); 593goto out; 594} 595 596strbuf_addf(err,_("reference repository '%s' is not a " 597"local repository."), path); 598goto out; 599} 600 601if(!access(mkpath("%s/shallow", ref_git), F_OK)) { 602strbuf_addf(err,_("reference repository '%s' is shallow"), 603 path); 604 seen_error =1; 605goto out; 606} 607 608if(!access(mkpath("%s/info/grafts", ref_git), F_OK)) { 609strbuf_addf(err, 610_("reference repository '%s' is grafted"), 611 path); 612 seen_error =1; 613goto out; 614} 615 616out: 617if(seen_error) { 618free(ref_git); 619 ref_git = NULL; 620} 621 622return ref_git; 623} 624 625intforeach_alt_odb(alt_odb_fn fn,void*cb) 626{ 627struct alternate_object_database *ent; 628int r =0; 629 630prepare_alt_odb(); 631for(ent = alt_odb_list; ent; ent = ent->next) { 632 r =fn(ent, cb); 633if(r) 634break; 635} 636return r; 637} 638 639voidprepare_alt_odb(void) 640{ 641const char*alt; 642 643if(alt_odb_tail) 644return; 645 646 alt =getenv(ALTERNATE_DB_ENVIRONMENT); 647if(!alt) alt =""; 648 649 alt_odb_tail = &alt_odb_list; 650link_alt_odb_entries(alt,strlen(alt), PATH_SEP, NULL,0); 651 652read_info_alternates(get_object_directory(),0); 653} 654 655/* Returns 1 if we have successfully freshened the file, 0 otherwise. */ 656static intfreshen_file(const char*fn) 657{ 658struct utimbuf t; 659 t.actime = t.modtime =time(NULL); 660return!utime(fn, &t); 661} 662 663/* 664 * All of the check_and_freshen functions return 1 if the file exists and was 665 * freshened (if freshening was requested), 0 otherwise. If they return 666 * 0, you should not assume that it is safe to skip a write of the object (it 667 * either does not exist on disk, or has a stale mtime and may be subject to 668 * pruning). 669 */ 670intcheck_and_freshen_file(const char*fn,int freshen) 671{ 672if(access(fn, F_OK)) 673return0; 674if(freshen && !freshen_file(fn)) 675return0; 676return1; 677} 678 679static intcheck_and_freshen_local(const unsigned char*sha1,int freshen) 680{ 681returncheck_and_freshen_file(sha1_file_name(sha1), freshen); 682} 683 684static intcheck_and_freshen_nonlocal(const unsigned char*sha1,int freshen) 685{ 686struct alternate_object_database *alt; 687prepare_alt_odb(); 688for(alt = alt_odb_list; alt; alt = alt->next) { 689const char*path =alt_sha1_path(alt, sha1); 690if(check_and_freshen_file(path, freshen)) 691return1; 692} 693return0; 694} 695 696static intcheck_and_freshen(const unsigned char*sha1,int freshen) 697{ 698returncheck_and_freshen_local(sha1, freshen) || 699check_and_freshen_nonlocal(sha1, freshen); 700} 701 702inthas_loose_object_nonlocal(const unsigned char*sha1) 703{ 704returncheck_and_freshen_nonlocal(sha1,0); 705} 706 707static inthas_loose_object(const unsigned char*sha1) 708{ 709returncheck_and_freshen(sha1,0); 710} 711 712static unsigned int pack_used_ctr; 713static unsigned int pack_mmap_calls; 714static unsigned int peak_pack_open_windows; 715static unsigned int pack_open_windows; 716static unsigned int pack_open_fds; 717static unsigned int pack_max_fds; 718static size_t peak_pack_mapped; 719static size_t pack_mapped; 720struct packed_git *packed_git; 721 722static struct mru packed_git_mru_storage; 723struct mru *packed_git_mru = &packed_git_mru_storage; 724 725voidpack_report(void) 726{ 727fprintf(stderr, 728"pack_report: getpagesize() =%10" SZ_FMT "\n" 729"pack_report: core.packedGitWindowSize =%10" SZ_FMT "\n" 730"pack_report: core.packedGitLimit =%10" SZ_FMT "\n", 731sz_fmt(getpagesize()), 732sz_fmt(packed_git_window_size), 733sz_fmt(packed_git_limit)); 734fprintf(stderr, 735"pack_report: pack_used_ctr =%10u\n" 736"pack_report: pack_mmap_calls =%10u\n" 737"pack_report: pack_open_windows =%10u /%10u\n" 738"pack_report: pack_mapped = " 739"%10" SZ_FMT " /%10" SZ_FMT "\n", 740 pack_used_ctr, 741 pack_mmap_calls, 742 pack_open_windows, peak_pack_open_windows, 743sz_fmt(pack_mapped),sz_fmt(peak_pack_mapped)); 744} 745 746/* 747 * Open and mmap the index file at path, perform a couple of 748 * consistency checks, then record its information to p. Return 0 on 749 * success. 750 */ 751static intcheck_packed_git_idx(const char*path,struct packed_git *p) 752{ 753void*idx_map; 754struct pack_idx_header *hdr; 755size_t idx_size; 756uint32_t version, nr, i, *index; 757int fd =git_open(path); 758struct stat st; 759 760if(fd <0) 761return-1; 762if(fstat(fd, &st)) { 763close(fd); 764return-1; 765} 766 idx_size =xsize_t(st.st_size); 767if(idx_size <4*256+20+20) { 768close(fd); 769returnerror("index file%sis too small", path); 770} 771 idx_map =xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd,0); 772close(fd); 773 774 hdr = idx_map; 775if(hdr->idx_signature ==htonl(PACK_IDX_SIGNATURE)) { 776 version =ntohl(hdr->idx_version); 777if(version <2|| version >2) { 778munmap(idx_map, idx_size); 779returnerror("index file%sis version %"PRIu32 780" and is not supported by this binary" 781" (try upgrading GIT to a newer version)", 782 path, version); 783} 784}else 785 version =1; 786 787 nr =0; 788 index = idx_map; 789if(version >1) 790 index +=2;/* skip index header */ 791for(i =0; i <256; i++) { 792uint32_t n =ntohl(index[i]); 793if(n < nr) { 794munmap(idx_map, idx_size); 795returnerror("non-monotonic index%s", path); 796} 797 nr = n; 798} 799 800if(version ==1) { 801/* 802 * Total size: 803 * - 256 index entries 4 bytes each 804 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset) 805 * - 20-byte SHA1 of the packfile 806 * - 20-byte SHA1 file checksum 807 */ 808if(idx_size !=4*256+ nr *24+20+20) { 809munmap(idx_map, idx_size); 810returnerror("wrong index v1 file size in%s", path); 811} 812}else if(version ==2) { 813/* 814 * Minimum size: 815 * - 8 bytes of header 816 * - 256 index entries 4 bytes each 817 * - 20-byte sha1 entry * nr 818 * - 4-byte crc entry * nr 819 * - 4-byte offset entry * nr 820 * - 20-byte SHA1 of the packfile 821 * - 20-byte SHA1 file checksum 822 * And after the 4-byte offset table might be a 823 * variable sized table containing 8-byte entries 824 * for offsets larger than 2^31. 825 */ 826unsigned long min_size =8+4*256+ nr*(20+4+4) +20+20; 827unsigned long max_size = min_size; 828if(nr) 829 max_size += (nr -1)*8; 830if(idx_size < min_size || idx_size > max_size) { 831munmap(idx_map, idx_size); 832returnerror("wrong index v2 file size in%s", path); 833} 834if(idx_size != min_size && 835/* 836 * make sure we can deal with large pack offsets. 837 * 31-bit signed offset won't be enough, neither 838 * 32-bit unsigned one will be. 839 */ 840(sizeof(off_t) <=4)) { 841munmap(idx_map, idx_size); 842returnerror("pack too large for current definition of off_t in%s", path); 843} 844} 845 846 p->index_version = version; 847 p->index_data = idx_map; 848 p->index_size = idx_size; 849 p->num_objects = nr; 850return0; 851} 852 853intopen_pack_index(struct packed_git *p) 854{ 855char*idx_name; 856size_t len; 857int ret; 858 859if(p->index_data) 860return0; 861 862if(!strip_suffix(p->pack_name,".pack", &len)) 863die("BUG: pack_name does not end in .pack"); 864 idx_name =xstrfmt("%.*s.idx", (int)len, p->pack_name); 865 ret =check_packed_git_idx(idx_name, p); 866free(idx_name); 867return ret; 868} 869 870static voidscan_windows(struct packed_git *p, 871struct packed_git **lru_p, 872struct pack_window **lru_w, 873struct pack_window **lru_l) 874{ 875struct pack_window *w, *w_l; 876 877for(w_l = NULL, w = p->windows; w; w = w->next) { 878if(!w->inuse_cnt) { 879if(!*lru_w || w->last_used < (*lru_w)->last_used) { 880*lru_p = p; 881*lru_w = w; 882*lru_l = w_l; 883} 884} 885 w_l = w; 886} 887} 888 889static intunuse_one_window(struct packed_git *current) 890{ 891struct packed_git *p, *lru_p = NULL; 892struct pack_window *lru_w = NULL, *lru_l = NULL; 893 894if(current) 895scan_windows(current, &lru_p, &lru_w, &lru_l); 896for(p = packed_git; p; p = p->next) 897scan_windows(p, &lru_p, &lru_w, &lru_l); 898if(lru_p) { 899munmap(lru_w->base, lru_w->len); 900 pack_mapped -= lru_w->len; 901if(lru_l) 902 lru_l->next = lru_w->next; 903else 904 lru_p->windows = lru_w->next; 905free(lru_w); 906 pack_open_windows--; 907return1; 908} 909return0; 910} 911 912voidrelease_pack_memory(size_t need) 913{ 914size_t cur = pack_mapped; 915while(need >= (cur - pack_mapped) &&unuse_one_window(NULL)) 916;/* nothing */ 917} 918 919static voidmmap_limit_check(size_t length) 920{ 921static size_t limit =0; 922if(!limit) { 923 limit =git_env_ulong("GIT_MMAP_LIMIT",0); 924if(!limit) 925 limit = SIZE_MAX; 926} 927if(length > limit) 928die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX, 929(uintmax_t)length, (uintmax_t)limit); 930} 931 932void*xmmap_gently(void*start,size_t length, 933int prot,int flags,int fd, off_t offset) 934{ 935void*ret; 936 937mmap_limit_check(length); 938 ret =mmap(start, length, prot, flags, fd, offset); 939if(ret == MAP_FAILED) { 940if(!length) 941return NULL; 942release_pack_memory(length); 943 ret =mmap(start, length, prot, flags, fd, offset); 944} 945return ret; 946} 947 948void*xmmap(void*start,size_t length, 949int prot,int flags,int fd, off_t offset) 950{ 951void*ret =xmmap_gently(start, length, prot, flags, fd, offset); 952if(ret == MAP_FAILED) 953die_errno("mmap failed"); 954return ret; 955} 956 957voidclose_pack_windows(struct packed_git *p) 958{ 959while(p->windows) { 960struct pack_window *w = p->windows; 961 962if(w->inuse_cnt) 963die("pack '%s' still has open windows to it", 964 p->pack_name); 965munmap(w->base, w->len); 966 pack_mapped -= w->len; 967 pack_open_windows--; 968 p->windows = w->next; 969free(w); 970} 971} 972 973static intclose_pack_fd(struct packed_git *p) 974{ 975if(p->pack_fd <0) 976return0; 977 978close(p->pack_fd); 979 pack_open_fds--; 980 p->pack_fd = -1; 981 982return1; 983} 984 985static voidclose_pack(struct packed_git *p) 986{ 987close_pack_windows(p); 988close_pack_fd(p); 989close_pack_index(p); 990} 991 992voidclose_all_packs(void) 993{ 994struct packed_git *p; 995 996for(p = packed_git; p; p = p->next) 997if(p->do_not_close) 998die("BUG: want to close pack marked 'do-not-close'"); 999else1000close_pack(p);1001}100210031004/*1005 * The LRU pack is the one with the oldest MRU window, preferring packs1006 * with no used windows, or the oldest mtime if it has no windows allocated.1007 */1008static voidfind_lru_pack(struct packed_git *p,struct packed_git **lru_p,struct pack_window **mru_w,int*accept_windows_inuse)1009{1010struct pack_window *w, *this_mru_w;1011int has_windows_inuse =0;10121013/*1014 * Reject this pack if it has windows and the previously selected1015 * one does not. If this pack does not have windows, reject1016 * it if the pack file is newer than the previously selected one.1017 */1018if(*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))1019return;10201021for(w = this_mru_w = p->windows; w; w = w->next) {1022/*1023 * Reject this pack if any of its windows are in use,1024 * but the previously selected pack did not have any1025 * inuse windows. Otherwise, record that this pack1026 * has windows in use.1027 */1028if(w->inuse_cnt) {1029if(*accept_windows_inuse)1030 has_windows_inuse =1;1031else1032return;1033}10341035if(w->last_used > this_mru_w->last_used)1036 this_mru_w = w;10371038/*1039 * Reject this pack if it has windows that have been1040 * used more recently than the previously selected pack.1041 * If the previously selected pack had windows inuse and1042 * we have not encountered a window in this pack that is1043 * inuse, skip this check since we prefer a pack with no1044 * inuse windows to one that has inuse windows.1045 */1046if(*mru_w && *accept_windows_inuse == has_windows_inuse &&1047 this_mru_w->last_used > (*mru_w)->last_used)1048return;1049}10501051/*1052 * Select this pack.1053 */1054*mru_w = this_mru_w;1055*lru_p = p;1056*accept_windows_inuse = has_windows_inuse;1057}10581059static intclose_one_pack(void)1060{1061struct packed_git *p, *lru_p = NULL;1062struct pack_window *mru_w = NULL;1063int accept_windows_inuse =1;10641065for(p = packed_git; p; p = p->next) {1066if(p->pack_fd == -1)1067continue;1068find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);1069}10701071if(lru_p)1072returnclose_pack_fd(lru_p);10731074return0;1075}10761077voidunuse_pack(struct pack_window **w_cursor)1078{1079struct pack_window *w = *w_cursor;1080if(w) {1081 w->inuse_cnt--;1082*w_cursor = NULL;1083}1084}10851086voidclose_pack_index(struct packed_git *p)1087{1088if(p->index_data) {1089munmap((void*)p->index_data, p->index_size);1090 p->index_data = NULL;1091}1092}10931094static unsigned intget_max_fd_limit(void)1095{1096#ifdef RLIMIT_NOFILE1097{1098struct rlimit lim;10991100if(!getrlimit(RLIMIT_NOFILE, &lim))1101return lim.rlim_cur;1102}1103#endif11041105#ifdef _SC_OPEN_MAX1106{1107long open_max =sysconf(_SC_OPEN_MAX);1108if(0< open_max)1109return open_max;1110/*1111 * Otherwise, we got -1 for one of the two1112 * reasons:1113 *1114 * (1) sysconf() did not understand _SC_OPEN_MAX1115 * and signaled an error with -1; or1116 * (2) sysconf() said there is no limit.1117 *1118 * We _could_ clear errno before calling sysconf() to1119 * tell these two cases apart and return a huge number1120 * in the latter case to let the caller cap it to a1121 * value that is not so selfish, but letting the1122 * fallback OPEN_MAX codepath take care of these cases1123 * is a lot simpler.1124 */1125}1126#endif11271128#ifdef OPEN_MAX1129return OPEN_MAX;1130#else1131return1;/* see the caller ;-) */1132#endif1133}11341135/*1136 * Do not call this directly as this leaks p->pack_fd on error return;1137 * call open_packed_git() instead.1138 */1139static intopen_packed_git_1(struct packed_git *p)1140{1141struct stat st;1142struct pack_header hdr;1143unsigned char sha1[20];1144unsigned char*idx_sha1;1145long fd_flag;11461147if(!p->index_data &&open_pack_index(p))1148returnerror("packfile%sindex unavailable", p->pack_name);11491150if(!pack_max_fds) {1151unsigned int max_fds =get_max_fd_limit();11521153/* Save 3 for stdin/stdout/stderr, 22 for work */1154if(25< max_fds)1155 pack_max_fds = max_fds -25;1156else1157 pack_max_fds =1;1158}11591160while(pack_max_fds <= pack_open_fds &&close_one_pack())1161;/* nothing */11621163 p->pack_fd =git_open(p->pack_name);1164if(p->pack_fd <0||fstat(p->pack_fd, &st))1165return-1;1166 pack_open_fds++;11671168/* If we created the struct before we had the pack we lack size. */1169if(!p->pack_size) {1170if(!S_ISREG(st.st_mode))1171returnerror("packfile%snot a regular file", p->pack_name);1172 p->pack_size = st.st_size;1173}else if(p->pack_size != st.st_size)1174returnerror("packfile%ssize changed", p->pack_name);11751176/* We leave these file descriptors open with sliding mmap;1177 * there is no point keeping them open across exec(), though.1178 */1179 fd_flag =fcntl(p->pack_fd, F_GETFD,0);1180if(fd_flag <0)1181returnerror("cannot determine file descriptor flags");1182 fd_flag |= FD_CLOEXEC;1183if(fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)1184returnerror("cannot set FD_CLOEXEC");11851186/* Verify we recognize this pack file format. */1187if(read_in_full(p->pack_fd, &hdr,sizeof(hdr)) !=sizeof(hdr))1188returnerror("file%sis far too short to be a packfile", p->pack_name);1189if(hdr.hdr_signature !=htonl(PACK_SIGNATURE))1190returnerror("file%sis not a GIT packfile", p->pack_name);1191if(!pack_version_ok(hdr.hdr_version))1192returnerror("packfile%sis version %"PRIu32" and not"1193" supported (try upgrading GIT to a newer version)",1194 p->pack_name,ntohl(hdr.hdr_version));11951196/* Verify the pack matches its index. */1197if(p->num_objects !=ntohl(hdr.hdr_entries))1198returnerror("packfile%sclaims to have %"PRIu32" objects"1199" while index indicates %"PRIu32" objects",1200 p->pack_name,ntohl(hdr.hdr_entries),1201 p->num_objects);1202if(lseek(p->pack_fd, p->pack_size -sizeof(sha1), SEEK_SET) == -1)1203returnerror("end of packfile%sis unavailable", p->pack_name);1204if(read_in_full(p->pack_fd, sha1,sizeof(sha1)) !=sizeof(sha1))1205returnerror("packfile%ssignature is unavailable", p->pack_name);1206 idx_sha1 = ((unsigned char*)p->index_data) + p->index_size -40;1207if(hashcmp(sha1, idx_sha1))1208returnerror("packfile%sdoes not match index", p->pack_name);1209return0;1210}12111212static intopen_packed_git(struct packed_git *p)1213{1214if(!open_packed_git_1(p))1215return0;1216close_pack_fd(p);1217return-1;1218}12191220static intin_window(struct pack_window *win, off_t offset)1221{1222/* We must promise at least 20 bytes (one hash) after the1223 * offset is available from this window, otherwise the offset1224 * is not actually in this window and a different window (which1225 * has that one hash excess) must be used. This is to support1226 * the object header and delta base parsing routines below.1227 */1228 off_t win_off = win->offset;1229return win_off <= offset1230&& (offset +20) <= (win_off + win->len);1231}12321233unsigned char*use_pack(struct packed_git *p,1234struct pack_window **w_cursor,1235 off_t offset,1236unsigned long*left)1237{1238struct pack_window *win = *w_cursor;12391240/* Since packfiles end in a hash of their content and it's1241 * pointless to ask for an offset into the middle of that1242 * hash, and the in_window function above wouldn't match1243 * don't allow an offset too close to the end of the file.1244 */1245if(!p->pack_size && p->pack_fd == -1&&open_packed_git(p))1246die("packfile%scannot be accessed", p->pack_name);1247if(offset > (p->pack_size -20))1248die("offset beyond end of packfile (truncated pack?)");1249if(offset <0)1250die(_("offset before end of packfile (broken .idx?)"));12511252if(!win || !in_window(win, offset)) {1253if(win)1254 win->inuse_cnt--;1255for(win = p->windows; win; win = win->next) {1256if(in_window(win, offset))1257break;1258}1259if(!win) {1260size_t window_align = packed_git_window_size /2;1261 off_t len;12621263if(p->pack_fd == -1&&open_packed_git(p))1264die("packfile%scannot be accessed", p->pack_name);12651266 win =xcalloc(1,sizeof(*win));1267 win->offset = (offset / window_align) * window_align;1268 len = p->pack_size - win->offset;1269if(len > packed_git_window_size)1270 len = packed_git_window_size;1271 win->len = (size_t)len;1272 pack_mapped += win->len;1273while(packed_git_limit < pack_mapped1274&&unuse_one_window(p))1275;/* nothing */1276 win->base =xmmap(NULL, win->len,1277 PROT_READ, MAP_PRIVATE,1278 p->pack_fd, win->offset);1279if(win->base == MAP_FAILED)1280die_errno("packfile%scannot be mapped",1281 p->pack_name);1282if(!win->offset && win->len == p->pack_size1283&& !p->do_not_close)1284close_pack_fd(p);1285 pack_mmap_calls++;1286 pack_open_windows++;1287if(pack_mapped > peak_pack_mapped)1288 peak_pack_mapped = pack_mapped;1289if(pack_open_windows > peak_pack_open_windows)1290 peak_pack_open_windows = pack_open_windows;1291 win->next = p->windows;1292 p->windows = win;1293}1294}1295if(win != *w_cursor) {1296 win->last_used = pack_used_ctr++;1297 win->inuse_cnt++;1298*w_cursor = win;1299}1300 offset -= win->offset;1301if(left)1302*left = win->len -xsize_t(offset);1303return win->base + offset;1304}13051306static struct packed_git *alloc_packed_git(int extra)1307{1308struct packed_git *p =xmalloc(st_add(sizeof(*p), extra));1309memset(p,0,sizeof(*p));1310 p->pack_fd = -1;1311return p;1312}13131314static voidtry_to_free_pack_memory(size_t size)1315{1316release_pack_memory(size);1317}13181319struct packed_git *add_packed_git(const char*path,size_t path_len,int local)1320{1321static int have_set_try_to_free_routine;1322struct stat st;1323size_t alloc;1324struct packed_git *p;13251326if(!have_set_try_to_free_routine) {1327 have_set_try_to_free_routine =1;1328set_try_to_free_routine(try_to_free_pack_memory);1329}13301331/*1332 * Make sure a corresponding .pack file exists and that1333 * the index looks sane.1334 */1335if(!strip_suffix_mem(path, &path_len,".idx"))1336return NULL;13371338/*1339 * ".pack" is long enough to hold any suffix we're adding (and1340 * the use xsnprintf double-checks that)1341 */1342 alloc =st_add3(path_len,strlen(".pack"),1);1343 p =alloc_packed_git(alloc);1344memcpy(p->pack_name, path, path_len);13451346xsnprintf(p->pack_name + path_len, alloc - path_len,".keep");1347if(!access(p->pack_name, F_OK))1348 p->pack_keep =1;13491350xsnprintf(p->pack_name + path_len, alloc - path_len,".pack");1351if(stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {1352free(p);1353return NULL;1354}13551356/* ok, it looks sane as far as we can check without1357 * actually mapping the pack file.1358 */1359 p->pack_size = st.st_size;1360 p->pack_local = local;1361 p->mtime = st.st_mtime;1362if(path_len <40||get_sha1_hex(path + path_len -40, p->sha1))1363hashclr(p->sha1);1364return p;1365}13661367struct packed_git *parse_pack_index(unsigned char*sha1,const char*idx_path)1368{1369const char*path =sha1_pack_name(sha1);1370size_t alloc =st_add(strlen(path),1);1371struct packed_git *p =alloc_packed_git(alloc);13721373memcpy(p->pack_name, path, alloc);/* includes NUL */1374hashcpy(p->sha1, sha1);1375if(check_packed_git_idx(idx_path, p)) {1376free(p);1377return NULL;1378}13791380return p;1381}13821383voidinstall_packed_git(struct packed_git *pack)1384{1385if(pack->pack_fd != -1)1386 pack_open_fds++;13871388 pack->next = packed_git;1389 packed_git = pack;1390}13911392void(*report_garbage)(unsigned seen_bits,const char*path);13931394static voidreport_helper(const struct string_list *list,1395int seen_bits,int first,int last)1396{1397if(seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))1398return;13991400for(; first < last; first++)1401report_garbage(seen_bits, list->items[first].string);1402}14031404static voidreport_pack_garbage(struct string_list *list)1405{1406int i, baselen = -1, first =0, seen_bits =0;14071408if(!report_garbage)1409return;14101411string_list_sort(list);14121413for(i =0; i < list->nr; i++) {1414const char*path = list->items[i].string;1415if(baselen != -1&&1416strncmp(path, list->items[first].string, baselen)) {1417report_helper(list, seen_bits, first, i);1418 baselen = -1;1419 seen_bits =0;1420}1421if(baselen == -1) {1422const char*dot =strrchr(path,'.');1423if(!dot) {1424report_garbage(PACKDIR_FILE_GARBAGE, path);1425continue;1426}1427 baselen = dot - path +1;1428 first = i;1429}1430if(!strcmp(path + baselen,"pack"))1431 seen_bits |=1;1432else if(!strcmp(path + baselen,"idx"))1433 seen_bits |=2;1434}1435report_helper(list, seen_bits, first, list->nr);1436}14371438static voidprepare_packed_git_one(char*objdir,int local)1439{1440struct strbuf path = STRBUF_INIT;1441size_t dirnamelen;1442DIR*dir;1443struct dirent *de;1444struct string_list garbage = STRING_LIST_INIT_DUP;14451446strbuf_addstr(&path, objdir);1447strbuf_addstr(&path,"/pack");1448 dir =opendir(path.buf);1449if(!dir) {1450if(errno != ENOENT)1451error_errno("unable to open object pack directory:%s",1452 path.buf);1453strbuf_release(&path);1454return;1455}1456strbuf_addch(&path,'/');1457 dirnamelen = path.len;1458while((de =readdir(dir)) != NULL) {1459struct packed_git *p;1460size_t base_len;14611462if(is_dot_or_dotdot(de->d_name))1463continue;14641465strbuf_setlen(&path, dirnamelen);1466strbuf_addstr(&path, de->d_name);14671468 base_len = path.len;1469if(strip_suffix_mem(path.buf, &base_len,".idx")) {1470/* Don't reopen a pack we already have. */1471for(p = packed_git; p; p = p->next) {1472size_t len;1473if(strip_suffix(p->pack_name,".pack", &len) &&1474 len == base_len &&1475!memcmp(p->pack_name, path.buf, len))1476break;1477}1478if(p == NULL &&1479/*1480 * See if it really is a valid .idx file with1481 * corresponding .pack file that we can map.1482 */1483(p =add_packed_git(path.buf, path.len, local)) != NULL)1484install_packed_git(p);1485}14861487if(!report_garbage)1488continue;14891490if(ends_with(de->d_name,".idx") ||1491ends_with(de->d_name,".pack") ||1492ends_with(de->d_name,".bitmap") ||1493ends_with(de->d_name,".keep"))1494string_list_append(&garbage, path.buf);1495else1496report_garbage(PACKDIR_FILE_GARBAGE, path.buf);1497}1498closedir(dir);1499report_pack_garbage(&garbage);1500string_list_clear(&garbage,0);1501strbuf_release(&path);1502}15031504static int approximate_object_count_valid;15051506/*1507 * Give a fast, rough count of the number of objects in the repository. This1508 * ignores loose objects completely. If you have a lot of them, then either1509 * you should repack because your performance will be awful, or they are1510 * all unreachable objects about to be pruned, in which case they're not really1511 * interesting as a measure of repo size in the first place.1512 */1513unsigned longapproximate_object_count(void)1514{1515static unsigned long count;1516if(!approximate_object_count_valid) {1517struct packed_git *p;15181519prepare_packed_git();1520 count =0;1521for(p = packed_git; p; p = p->next) {1522if(open_pack_index(p))1523continue;1524 count += p->num_objects;1525}1526}1527return count;1528}15291530static void*get_next_packed_git(const void*p)1531{1532return((const struct packed_git *)p)->next;1533}15341535static voidset_next_packed_git(void*p,void*next)1536{1537((struct packed_git *)p)->next = next;1538}15391540static intsort_pack(const void*a_,const void*b_)1541{1542const struct packed_git *a = a_;1543const struct packed_git *b = b_;1544int st;15451546/*1547 * Local packs tend to contain objects specific to our1548 * variant of the project than remote ones. In addition,1549 * remote ones could be on a network mounted filesystem.1550 * Favor local ones for these reasons.1551 */1552 st = a->pack_local - b->pack_local;1553if(st)1554return-st;15551556/*1557 * Younger packs tend to contain more recent objects,1558 * and more recent objects tend to get accessed more1559 * often.1560 */1561if(a->mtime < b->mtime)1562return1;1563else if(a->mtime == b->mtime)1564return0;1565return-1;1566}15671568static voidrearrange_packed_git(void)1569{1570 packed_git =llist_mergesort(packed_git, get_next_packed_git,1571 set_next_packed_git, sort_pack);1572}15731574static voidprepare_packed_git_mru(void)1575{1576struct packed_git *p;15771578mru_clear(packed_git_mru);1579for(p = packed_git; p; p = p->next)1580mru_append(packed_git_mru, p);1581}15821583static int prepare_packed_git_run_once =0;1584voidprepare_packed_git(void)1585{1586struct alternate_object_database *alt;15871588if(prepare_packed_git_run_once)1589return;1590prepare_packed_git_one(get_object_directory(),1);1591prepare_alt_odb();1592for(alt = alt_odb_list; alt; alt = alt->next)1593prepare_packed_git_one(alt->path,0);1594rearrange_packed_git();1595prepare_packed_git_mru();1596 prepare_packed_git_run_once =1;1597}15981599voidreprepare_packed_git(void)1600{1601 approximate_object_count_valid =0;1602 prepare_packed_git_run_once =0;1603prepare_packed_git();1604}16051606static voidmark_bad_packed_object(struct packed_git *p,1607const unsigned char*sha1)1608{1609unsigned i;1610for(i =0; i < p->num_bad_objects; i++)1611if(!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))1612return;1613 p->bad_object_sha1 =xrealloc(p->bad_object_sha1,1614st_mult(GIT_SHA1_RAWSZ,1615st_add(p->num_bad_objects,1)));1616hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);1617 p->num_bad_objects++;1618}16191620static const struct packed_git *has_packed_and_bad(const unsigned char*sha1)1621{1622struct packed_git *p;1623unsigned i;16241625for(p = packed_git; p; p = p->next)1626for(i =0; i < p->num_bad_objects; i++)1627if(!hashcmp(sha1, p->bad_object_sha1 +20* i))1628return p;1629return NULL;1630}16311632/*1633 * With an in-core object data in "map", rehash it to make sure the1634 * object name actually matches "sha1" to detect object corruption.1635 * With "map" == NULL, try reading the object named with "sha1" using1636 * the streaming interface and rehash it to do the same.1637 */1638intcheck_sha1_signature(const unsigned char*sha1,void*map,1639unsigned long size,const char*type)1640{1641unsigned char real_sha1[20];1642enum object_type obj_type;1643struct git_istream *st;1644 git_SHA_CTX c;1645char hdr[32];1646int hdrlen;16471648if(map) {1649hash_sha1_file(map, size, type, real_sha1);1650returnhashcmp(sha1, real_sha1) ? -1:0;1651}16521653 st =open_istream(sha1, &obj_type, &size, NULL);1654if(!st)1655return-1;16561657/* Generate the header */1658 hdrlen =xsnprintf(hdr,sizeof(hdr),"%s %lu",typename(obj_type), size) +1;16591660/* Sha1.. */1661git_SHA1_Init(&c);1662git_SHA1_Update(&c, hdr, hdrlen);1663for(;;) {1664char buf[1024*16];1665 ssize_t readlen =read_istream(st, buf,sizeof(buf));16661667if(readlen <0) {1668close_istream(st);1669return-1;1670}1671if(!readlen)1672break;1673git_SHA1_Update(&c, buf, readlen);1674}1675git_SHA1_Final(real_sha1, &c);1676close_istream(st);1677returnhashcmp(sha1, real_sha1) ? -1:0;1678}16791680intgit_open_cloexec(const char*name,int flags)1681{1682int fd;1683static int o_cloexec = O_CLOEXEC;16841685 fd =open(name, flags | o_cloexec);1686if((o_cloexec & O_CLOEXEC) && fd <0&& errno == EINVAL) {1687/* Try again w/o O_CLOEXEC: the kernel might not support it */1688 o_cloexec &= ~O_CLOEXEC;1689 fd =open(name, flags | o_cloexec);1690}16911692#if defined(F_GETFL) && defined(F_SETFL) && defined(FD_CLOEXEC)1693{1694static int fd_cloexec = FD_CLOEXEC;16951696if(!o_cloexec &&0<= fd && fd_cloexec) {1697/* Opened w/o O_CLOEXEC? try with fcntl(2) to add it */1698int flags =fcntl(fd, F_GETFL);1699if(fcntl(fd, F_SETFL, flags | fd_cloexec))1700 fd_cloexec =0;1701}1702}1703#endif1704return fd;1705}17061707/*1708 * Find "sha1" as a loose object in the local repository or in an alternate.1709 * Returns 0 on success, negative on failure.1710 *1711 * The "path" out-parameter will give the path of the object we found (if any).1712 * Note that it may point to static storage and is only valid until another1713 * call to sha1_file_name(), etc.1714 */1715static intstat_sha1_file(const unsigned char*sha1,struct stat *st,1716const char**path)1717{1718struct alternate_object_database *alt;17191720*path =sha1_file_name(sha1);1721if(!lstat(*path, st))1722return0;17231724prepare_alt_odb();1725 errno = ENOENT;1726for(alt = alt_odb_list; alt; alt = alt->next) {1727*path =alt_sha1_path(alt, sha1);1728if(!lstat(*path, st))1729return0;1730}17311732return-1;1733}17341735/*1736 * Like stat_sha1_file(), but actually open the object and return the1737 * descriptor. See the caveats on the "path" parameter above.1738 */1739static intopen_sha1_file(const unsigned char*sha1,const char**path)1740{1741int fd;1742struct alternate_object_database *alt;1743int most_interesting_errno;17441745*path =sha1_file_name(sha1);1746 fd =git_open(*path);1747if(fd >=0)1748return fd;1749 most_interesting_errno = errno;17501751prepare_alt_odb();1752for(alt = alt_odb_list; alt; alt = alt->next) {1753*path =alt_sha1_path(alt, sha1);1754 fd =git_open(*path);1755if(fd >=0)1756return fd;1757if(most_interesting_errno == ENOENT)1758 most_interesting_errno = errno;1759}1760 errno = most_interesting_errno;1761return-1;1762}17631764/*1765 * Map the loose object at "path" if it is not NULL, or the path found by1766 * searching for a loose object named "sha1".1767 */1768static void*map_sha1_file_1(const char*path,1769const unsigned char*sha1,1770unsigned long*size)1771{1772void*map;1773int fd;17741775if(path)1776 fd =git_open(path);1777else1778 fd =open_sha1_file(sha1, &path);1779 map = NULL;1780if(fd >=0) {1781struct stat st;17821783if(!fstat(fd, &st)) {1784*size =xsize_t(st.st_size);1785if(!*size) {1786/* mmap() is forbidden on empty files */1787error("object file%sis empty", path);1788return NULL;1789}1790 map =xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd,0);1791}1792close(fd);1793}1794return map;1795}17961797void*map_sha1_file(const unsigned char*sha1,unsigned long*size)1798{1799returnmap_sha1_file_1(NULL, sha1, size);1800}18011802unsigned longunpack_object_header_buffer(const unsigned char*buf,1803unsigned long len,enum object_type *type,unsigned long*sizep)1804{1805unsigned shift;1806unsigned long size, c;1807unsigned long used =0;18081809 c = buf[used++];1810*type = (c >>4) &7;1811 size = c &15;1812 shift =4;1813while(c &0x80) {1814if(len <= used ||bitsizeof(long) <= shift) {1815error("bad object header");1816 size = used =0;1817break;1818}1819 c = buf[used++];1820 size += (c &0x7f) << shift;1821 shift +=7;1822}1823*sizep = size;1824return used;1825}18261827static intunpack_sha1_short_header(git_zstream *stream,1828unsigned char*map,unsigned long mapsize,1829void*buffer,unsigned long bufsiz)1830{1831/* Get the data stream */1832memset(stream,0,sizeof(*stream));1833 stream->next_in = map;1834 stream->avail_in = mapsize;1835 stream->next_out = buffer;1836 stream->avail_out = bufsiz;18371838git_inflate_init(stream);1839returngit_inflate(stream,0);1840}18411842intunpack_sha1_header(git_zstream *stream,1843unsigned char*map,unsigned long mapsize,1844void*buffer,unsigned long bufsiz)1845{1846int status =unpack_sha1_short_header(stream, map, mapsize,1847 buffer, bufsiz);18481849if(status < Z_OK)1850return status;18511852/* Make sure we have the terminating NUL */1853if(!memchr(buffer,'\0', stream->next_out - (unsigned char*)buffer))1854return-1;1855return0;1856}18571858static intunpack_sha1_header_to_strbuf(git_zstream *stream,unsigned char*map,1859unsigned long mapsize,void*buffer,1860unsigned long bufsiz,struct strbuf *header)1861{1862int status;18631864 status =unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz);1865if(status < Z_OK)1866return-1;18671868/*1869 * Check if entire header is unpacked in the first iteration.1870 */1871if(memchr(buffer,'\0', stream->next_out - (unsigned char*)buffer))1872return0;18731874/*1875 * buffer[0..bufsiz] was not large enough. Copy the partial1876 * result out to header, and then append the result of further1877 * reading the stream.1878 */1879strbuf_add(header, buffer, stream->next_out - (unsigned char*)buffer);1880 stream->next_out = buffer;1881 stream->avail_out = bufsiz;18821883do{1884 status =git_inflate(stream,0);1885strbuf_add(header, buffer, stream->next_out - (unsigned char*)buffer);1886if(memchr(buffer,'\0', stream->next_out - (unsigned char*)buffer))1887return0;1888 stream->next_out = buffer;1889 stream->avail_out = bufsiz;1890}while(status != Z_STREAM_END);1891return-1;1892}18931894static void*unpack_sha1_rest(git_zstream *stream,void*buffer,unsigned long size,const unsigned char*sha1)1895{1896int bytes =strlen(buffer) +1;1897unsigned char*buf =xmallocz(size);1898unsigned long n;1899int status = Z_OK;19001901 n = stream->total_out - bytes;1902if(n > size)1903 n = size;1904memcpy(buf, (char*) buffer + bytes, n);1905 bytes = n;1906if(bytes <= size) {1907/*1908 * The above condition must be (bytes <= size), not1909 * (bytes < size). In other words, even though we1910 * expect no more output and set avail_out to zero,1911 * the input zlib stream may have bytes that express1912 * "this concludes the stream", and we *do* want to1913 * eat that input.1914 *1915 * Otherwise we would not be able to test that we1916 * consumed all the input to reach the expected size;1917 * we also want to check that zlib tells us that all1918 * went well with status == Z_STREAM_END at the end.1919 */1920 stream->next_out = buf + bytes;1921 stream->avail_out = size - bytes;1922while(status == Z_OK)1923 status =git_inflate(stream, Z_FINISH);1924}1925if(status == Z_STREAM_END && !stream->avail_in) {1926git_inflate_end(stream);1927return buf;1928}19291930if(status <0)1931error("corrupt loose object '%s'",sha1_to_hex(sha1));1932else if(stream->avail_in)1933error("garbage at end of loose object '%s'",1934sha1_to_hex(sha1));1935free(buf);1936return NULL;1937}19381939/*1940 * We used to just use "sscanf()", but that's actually way1941 * too permissive for what we want to check. So do an anal1942 * object header parse by hand.1943 */1944static intparse_sha1_header_extended(const char*hdr,struct object_info *oi,1945unsigned int flags)1946{1947const char*type_buf = hdr;1948unsigned long size;1949int type, type_len =0;19501951/*1952 * The type can be of any size but is followed by1953 * a space.1954 */1955for(;;) {1956char c = *hdr++;1957if(!c)1958return-1;1959if(c ==' ')1960break;1961 type_len++;1962}19631964 type =type_from_string_gently(type_buf, type_len,1);1965if(oi->typename)1966strbuf_add(oi->typename, type_buf, type_len);1967/*1968 * Set type to 0 if its an unknown object and1969 * we're obtaining the type using '--allow-unknown-type'1970 * option.1971 */1972if((flags & LOOKUP_UNKNOWN_OBJECT) && (type <0))1973 type =0;1974else if(type <0)1975die("invalid object type");1976if(oi->typep)1977*oi->typep = type;19781979/*1980 * The length must follow immediately, and be in canonical1981 * decimal format (ie "010" is not valid).1982 */1983 size = *hdr++ -'0';1984if(size >9)1985return-1;1986if(size) {1987for(;;) {1988unsigned long c = *hdr -'0';1989if(c >9)1990break;1991 hdr++;1992 size = size *10+ c;1993}1994}19951996if(oi->sizep)1997*oi->sizep = size;19981999/*2000 * The length must be followed by a zero byte2001 */2002return*hdr ? -1: type;2003}20042005intparse_sha1_header(const char*hdr,unsigned long*sizep)2006{2007struct object_info oi = OBJECT_INFO_INIT;20082009 oi.sizep = sizep;2010returnparse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);2011}20122013static void*unpack_sha1_file(void*map,unsigned long mapsize,enum object_type *type,unsigned long*size,const unsigned char*sha1)2014{2015int ret;2016 git_zstream stream;2017char hdr[8192];20182019 ret =unpack_sha1_header(&stream, map, mapsize, hdr,sizeof(hdr));2020if(ret < Z_OK || (*type =parse_sha1_header(hdr, size)) <0)2021return NULL;20222023returnunpack_sha1_rest(&stream, hdr, *size, sha1);2024}20252026unsigned longget_size_from_delta(struct packed_git *p,2027struct pack_window **w_curs,2028 off_t curpos)2029{2030const unsigned char*data;2031unsigned char delta_head[20], *in;2032 git_zstream stream;2033int st;20342035memset(&stream,0,sizeof(stream));2036 stream.next_out = delta_head;2037 stream.avail_out =sizeof(delta_head);20382039git_inflate_init(&stream);2040do{2041 in =use_pack(p, w_curs, curpos, &stream.avail_in);2042 stream.next_in = in;2043 st =git_inflate(&stream, Z_FINISH);2044 curpos += stream.next_in - in;2045}while((st == Z_OK || st == Z_BUF_ERROR) &&2046 stream.total_out <sizeof(delta_head));2047git_inflate_end(&stream);2048if((st != Z_STREAM_END) && stream.total_out !=sizeof(delta_head)) {2049error("delta data unpack-initial failed");2050return0;2051}20522053/* Examine the initial part of the delta to figure out2054 * the result size.2055 */2056 data = delta_head;20572058/* ignore base size */2059get_delta_hdr_size(&data, delta_head+sizeof(delta_head));20602061/* Read the result size */2062returnget_delta_hdr_size(&data, delta_head+sizeof(delta_head));2063}20642065static off_t get_delta_base(struct packed_git *p,2066struct pack_window **w_curs,2067 off_t *curpos,2068enum object_type type,2069 off_t delta_obj_offset)2070{2071unsigned char*base_info =use_pack(p, w_curs, *curpos, NULL);2072 off_t base_offset;20732074/* use_pack() assured us we have [base_info, base_info + 20)2075 * as a range that we can look at without walking off the2076 * end of the mapped window. Its actually the hash size2077 * that is assured. An OFS_DELTA longer than the hash size2078 * is stupid, as then a REF_DELTA would be smaller to store.2079 */2080if(type == OBJ_OFS_DELTA) {2081unsigned used =0;2082unsigned char c = base_info[used++];2083 base_offset = c &127;2084while(c &128) {2085 base_offset +=1;2086if(!base_offset ||MSB(base_offset,7))2087return0;/* overflow */2088 c = base_info[used++];2089 base_offset = (base_offset <<7) + (c &127);2090}2091 base_offset = delta_obj_offset - base_offset;2092if(base_offset <=0|| base_offset >= delta_obj_offset)2093return0;/* out of bound */2094*curpos += used;2095}else if(type == OBJ_REF_DELTA) {2096/* The base entry _must_ be in the same pack */2097 base_offset =find_pack_entry_one(base_info, p);2098*curpos +=20;2099}else2100die("I am totally screwed");2101return base_offset;2102}21032104/*2105 * Like get_delta_base above, but we return the sha1 instead of the pack2106 * offset. This means it is cheaper for REF deltas (we do not have to do2107 * the final object lookup), but more expensive for OFS deltas (we2108 * have to load the revidx to convert the offset back into a sha1).2109 */2110static const unsigned char*get_delta_base_sha1(struct packed_git *p,2111struct pack_window **w_curs,2112 off_t curpos,2113enum object_type type,2114 off_t delta_obj_offset)2115{2116if(type == OBJ_REF_DELTA) {2117unsigned char*base =use_pack(p, w_curs, curpos, NULL);2118return base;2119}else if(type == OBJ_OFS_DELTA) {2120struct revindex_entry *revidx;2121 off_t base_offset =get_delta_base(p, w_curs, &curpos,2122 type, delta_obj_offset);21232124if(!base_offset)2125return NULL;21262127 revidx =find_pack_revindex(p, base_offset);2128if(!revidx)2129return NULL;21302131returnnth_packed_object_sha1(p, revidx->nr);2132}else2133return NULL;2134}21352136intunpack_object_header(struct packed_git *p,2137struct pack_window **w_curs,2138 off_t *curpos,2139unsigned long*sizep)2140{2141unsigned char*base;2142unsigned long left;2143unsigned long used;2144enum object_type type;21452146/* use_pack() assures us we have [base, base + 20) available2147 * as a range that we can look at. (Its actually the hash2148 * size that is assured.) With our object header encoding2149 * the maximum deflated object size is 2^137, which is just2150 * insane, so we know won't exceed what we have been given.2151 */2152 base =use_pack(p, w_curs, *curpos, &left);2153 used =unpack_object_header_buffer(base, left, &type, sizep);2154if(!used) {2155 type = OBJ_BAD;2156}else2157*curpos += used;21582159return type;2160}21612162static intretry_bad_packed_offset(struct packed_git *p, off_t obj_offset)2163{2164int type;2165struct revindex_entry *revidx;2166const unsigned char*sha1;2167 revidx =find_pack_revindex(p, obj_offset);2168if(!revidx)2169return OBJ_BAD;2170 sha1 =nth_packed_object_sha1(p, revidx->nr);2171mark_bad_packed_object(p, sha1);2172 type =sha1_object_info(sha1, NULL);2173if(type <= OBJ_NONE)2174return OBJ_BAD;2175return type;2176}21772178#define POI_STACK_PREALLOC 6421792180static enum object_type packed_to_object_type(struct packed_git *p,2181 off_t obj_offset,2182enum object_type type,2183struct pack_window **w_curs,2184 off_t curpos)2185{2186 off_t small_poi_stack[POI_STACK_PREALLOC];2187 off_t *poi_stack = small_poi_stack;2188int poi_stack_nr =0, poi_stack_alloc = POI_STACK_PREALLOC;21892190while(type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {2191 off_t base_offset;2192unsigned long size;2193/* Push the object we're going to leave behind */2194if(poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {2195 poi_stack_alloc =alloc_nr(poi_stack_nr);2196ALLOC_ARRAY(poi_stack, poi_stack_alloc);2197memcpy(poi_stack, small_poi_stack,sizeof(off_t)*poi_stack_nr);2198}else{2199ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);2200}2201 poi_stack[poi_stack_nr++] = obj_offset;2202/* If parsing the base offset fails, just unwind */2203 base_offset =get_delta_base(p, w_curs, &curpos, type, obj_offset);2204if(!base_offset)2205goto unwind;2206 curpos = obj_offset = base_offset;2207 type =unpack_object_header(p, w_curs, &curpos, &size);2208if(type <= OBJ_NONE) {2209/* If getting the base itself fails, we first2210 * retry the base, otherwise unwind */2211 type =retry_bad_packed_offset(p, base_offset);2212if(type > OBJ_NONE)2213goto out;2214goto unwind;2215}2216}22172218switch(type) {2219case OBJ_BAD:2220case OBJ_COMMIT:2221case OBJ_TREE:2222case OBJ_BLOB:2223case OBJ_TAG:2224break;2225default:2226error("unknown object type%iat offset %"PRIuMAX" in%s",2227 type, (uintmax_t)obj_offset, p->pack_name);2228 type = OBJ_BAD;2229}22302231out:2232if(poi_stack != small_poi_stack)2233free(poi_stack);2234return type;22352236unwind:2237while(poi_stack_nr) {2238 obj_offset = poi_stack[--poi_stack_nr];2239 type =retry_bad_packed_offset(p, obj_offset);2240if(type > OBJ_NONE)2241goto out;2242}2243 type = OBJ_BAD;2244goto out;2245}22462247intpacked_object_info(struct packed_git *p, off_t obj_offset,2248struct object_info *oi)2249{2250struct pack_window *w_curs = NULL;2251unsigned long size;2252 off_t curpos = obj_offset;2253enum object_type type;22542255/*2256 * We always get the representation type, but only convert it to2257 * a "real" type later if the caller is interested.2258 */2259 type =unpack_object_header(p, &w_curs, &curpos, &size);22602261if(oi->sizep) {2262if(type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {2263 off_t tmp_pos = curpos;2264 off_t base_offset =get_delta_base(p, &w_curs, &tmp_pos,2265 type, obj_offset);2266if(!base_offset) {2267 type = OBJ_BAD;2268goto out;2269}2270*oi->sizep =get_size_from_delta(p, &w_curs, tmp_pos);2271if(*oi->sizep ==0) {2272 type = OBJ_BAD;2273goto out;2274}2275}else{2276*oi->sizep = size;2277}2278}22792280if(oi->disk_sizep) {2281struct revindex_entry *revidx =find_pack_revindex(p, obj_offset);2282*oi->disk_sizep = revidx[1].offset - obj_offset;2283}22842285if(oi->typep) {2286*oi->typep =packed_to_object_type(p, obj_offset, type, &w_curs, curpos);2287if(*oi->typep <0) {2288 type = OBJ_BAD;2289goto out;2290}2291}22922293if(oi->delta_base_sha1) {2294if(type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {2295const unsigned char*base;22962297 base =get_delta_base_sha1(p, &w_curs, curpos,2298 type, obj_offset);2299if(!base) {2300 type = OBJ_BAD;2301goto out;2302}23032304hashcpy(oi->delta_base_sha1, base);2305}else2306hashclr(oi->delta_base_sha1);2307}23082309out:2310unuse_pack(&w_curs);2311return type;2312}23132314static void*unpack_compressed_entry(struct packed_git *p,2315struct pack_window **w_curs,2316 off_t curpos,2317unsigned long size)2318{2319int st;2320 git_zstream stream;2321unsigned char*buffer, *in;23222323 buffer =xmallocz_gently(size);2324if(!buffer)2325return NULL;2326memset(&stream,0,sizeof(stream));2327 stream.next_out = buffer;2328 stream.avail_out = size +1;23292330git_inflate_init(&stream);2331do{2332 in =use_pack(p, w_curs, curpos, &stream.avail_in);2333 stream.next_in = in;2334 st =git_inflate(&stream, Z_FINISH);2335if(!stream.avail_out)2336break;/* the payload is larger than it should be */2337 curpos += stream.next_in - in;2338}while(st == Z_OK || st == Z_BUF_ERROR);2339git_inflate_end(&stream);2340if((st != Z_STREAM_END) || stream.total_out != size) {2341free(buffer);2342return NULL;2343}23442345return buffer;2346}23472348static struct hashmap delta_base_cache;2349static size_t delta_base_cached;23502351staticLIST_HEAD(delta_base_cache_lru);23522353struct delta_base_cache_key {2354struct packed_git *p;2355 off_t base_offset;2356};23572358struct delta_base_cache_entry {2359struct hashmap hash;2360struct delta_base_cache_key key;2361struct list_head lru;2362void*data;2363unsigned long size;2364enum object_type type;2365};23662367static unsigned intpack_entry_hash(struct packed_git *p, off_t base_offset)2368{2369unsigned int hash;23702371 hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;2372 hash += (hash >>8) + (hash >>16);2373return hash;2374}23752376static struct delta_base_cache_entry *2377get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)2378{2379struct hashmap_entry entry;2380struct delta_base_cache_key key;23812382if(!delta_base_cache.cmpfn)2383return NULL;23842385hashmap_entry_init(&entry,pack_entry_hash(p, base_offset));2386 key.p = p;2387 key.base_offset = base_offset;2388returnhashmap_get(&delta_base_cache, &entry, &key);2389}23902391static intdelta_base_cache_key_eq(const struct delta_base_cache_key *a,2392const struct delta_base_cache_key *b)2393{2394return a->p == b->p && a->base_offset == b->base_offset;2395}23962397static intdelta_base_cache_hash_cmp(const void*va,const void*vb,2398const void*vkey)2399{2400const struct delta_base_cache_entry *a = va, *b = vb;2401const struct delta_base_cache_key *key = vkey;2402if(key)2403return!delta_base_cache_key_eq(&a->key, key);2404else2405return!delta_base_cache_key_eq(&a->key, &b->key);2406}24072408static intin_delta_base_cache(struct packed_git *p, off_t base_offset)2409{2410return!!get_delta_base_cache_entry(p, base_offset);2411}24122413/*2414 * Remove the entry from the cache, but do _not_ free the associated2415 * entry data. The caller takes ownership of the "data" buffer, and2416 * should copy out any fields it wants before detaching.2417 */2418static voiddetach_delta_base_cache_entry(struct delta_base_cache_entry *ent)2419{2420hashmap_remove(&delta_base_cache, ent, &ent->key);2421list_del(&ent->lru);2422 delta_base_cached -= ent->size;2423free(ent);2424}24252426static void*cache_or_unpack_entry(struct packed_git *p, off_t base_offset,2427unsigned long*base_size,enum object_type *type)2428{2429struct delta_base_cache_entry *ent;24302431 ent =get_delta_base_cache_entry(p, base_offset);2432if(!ent)2433returnunpack_entry(p, base_offset, type, base_size);24342435*type = ent->type;2436*base_size = ent->size;2437returnxmemdupz(ent->data, ent->size);2438}24392440staticinlinevoidrelease_delta_base_cache(struct delta_base_cache_entry *ent)2441{2442free(ent->data);2443detach_delta_base_cache_entry(ent);2444}24452446voidclear_delta_base_cache(void)2447{2448struct list_head *lru, *tmp;2449list_for_each_safe(lru, tmp, &delta_base_cache_lru) {2450struct delta_base_cache_entry *entry =2451list_entry(lru,struct delta_base_cache_entry, lru);2452release_delta_base_cache(entry);2453}2454}24552456static voidadd_delta_base_cache(struct packed_git *p, off_t base_offset,2457void*base,unsigned long base_size,enum object_type type)2458{2459struct delta_base_cache_entry *ent =xmalloc(sizeof(*ent));2460struct list_head *lru, *tmp;24612462 delta_base_cached += base_size;24632464list_for_each_safe(lru, tmp, &delta_base_cache_lru) {2465struct delta_base_cache_entry *f =2466list_entry(lru,struct delta_base_cache_entry, lru);2467if(delta_base_cached <= delta_base_cache_limit)2468break;2469release_delta_base_cache(f);2470}24712472 ent->key.p = p;2473 ent->key.base_offset = base_offset;2474 ent->type = type;2475 ent->data = base;2476 ent->size = base_size;2477list_add_tail(&ent->lru, &delta_base_cache_lru);24782479if(!delta_base_cache.cmpfn)2480hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp,0);2481hashmap_entry_init(ent,pack_entry_hash(p, base_offset));2482hashmap_add(&delta_base_cache, ent);2483}24842485static void*read_object(const unsigned char*sha1,enum object_type *type,2486unsigned long*size);24872488static voidwrite_pack_access_log(struct packed_git *p, off_t obj_offset)2489{2490static struct trace_key pack_access =TRACE_KEY_INIT(PACK_ACCESS);2491trace_printf_key(&pack_access,"%s%"PRIuMAX"\n",2492 p->pack_name, (uintmax_t)obj_offset);2493}24942495int do_check_packed_object_crc;24962497#define UNPACK_ENTRY_STACK_PREALLOC 642498struct unpack_entry_stack_ent {2499 off_t obj_offset;2500 off_t curpos;2501unsigned long size;2502};25032504void*unpack_entry(struct packed_git *p, off_t obj_offset,2505enum object_type *final_type,unsigned long*final_size)2506{2507struct pack_window *w_curs = NULL;2508 off_t curpos = obj_offset;2509void*data = NULL;2510unsigned long size;2511enum object_type type;2512struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];2513struct unpack_entry_stack_ent *delta_stack = small_delta_stack;2514int delta_stack_nr =0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;2515int base_from_cache =0;25162517write_pack_access_log(p, obj_offset);25182519/* PHASE 1: drill down to the innermost base object */2520for(;;) {2521 off_t base_offset;2522int i;2523struct delta_base_cache_entry *ent;25242525 ent =get_delta_base_cache_entry(p, curpos);2526if(ent) {2527 type = ent->type;2528 data = ent->data;2529 size = ent->size;2530detach_delta_base_cache_entry(ent);2531 base_from_cache =1;2532break;2533}25342535if(do_check_packed_object_crc && p->index_version >1) {2536struct revindex_entry *revidx =find_pack_revindex(p, obj_offset);2537 off_t len = revidx[1].offset - obj_offset;2538if(check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {2539const unsigned char*sha1 =2540nth_packed_object_sha1(p, revidx->nr);2541error("bad packed object CRC for%s",2542sha1_to_hex(sha1));2543mark_bad_packed_object(p, sha1);2544unuse_pack(&w_curs);2545return NULL;2546}2547}25482549 type =unpack_object_header(p, &w_curs, &curpos, &size);2550if(type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)2551break;25522553 base_offset =get_delta_base(p, &w_curs, &curpos, type, obj_offset);2554if(!base_offset) {2555error("failed to validate delta base reference "2556"at offset %"PRIuMAX" from%s",2557(uintmax_t)curpos, p->pack_name);2558/* bail to phase 2, in hopes of recovery */2559 data = NULL;2560break;2561}25622563/* push object, proceed to base */2564if(delta_stack_nr >= delta_stack_alloc2565&& delta_stack == small_delta_stack) {2566 delta_stack_alloc =alloc_nr(delta_stack_nr);2567ALLOC_ARRAY(delta_stack, delta_stack_alloc);2568memcpy(delta_stack, small_delta_stack,2569sizeof(*delta_stack)*delta_stack_nr);2570}else{2571ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);2572}2573 i = delta_stack_nr++;2574 delta_stack[i].obj_offset = obj_offset;2575 delta_stack[i].curpos = curpos;2576 delta_stack[i].size = size;25772578 curpos = obj_offset = base_offset;2579}25802581/* PHASE 2: handle the base */2582switch(type) {2583case OBJ_OFS_DELTA:2584case OBJ_REF_DELTA:2585if(data)2586die("BUG: unpack_entry: left loop at a valid delta");2587break;2588case OBJ_COMMIT:2589case OBJ_TREE:2590case OBJ_BLOB:2591case OBJ_TAG:2592if(!base_from_cache)2593 data =unpack_compressed_entry(p, &w_curs, curpos, size);2594break;2595default:2596 data = NULL;2597error("unknown object type%iat offset %"PRIuMAX" in%s",2598 type, (uintmax_t)obj_offset, p->pack_name);2599}26002601/* PHASE 3: apply deltas in order */26022603/* invariants:2604 * 'data' holds the base data, or NULL if there was corruption2605 */2606while(delta_stack_nr) {2607void*delta_data;2608void*base = data;2609void*external_base = NULL;2610unsigned long delta_size, base_size = size;2611int i;26122613 data = NULL;26142615if(base)2616add_delta_base_cache(p, obj_offset, base, base_size, type);26172618if(!base) {2619/*2620 * We're probably in deep shit, but let's try to fetch2621 * the required base anyway from another pack or loose.2622 * This is costly but should happen only in the presence2623 * of a corrupted pack, and is better than failing outright.2624 */2625struct revindex_entry *revidx;2626const unsigned char*base_sha1;2627 revidx =find_pack_revindex(p, obj_offset);2628if(revidx) {2629 base_sha1 =nth_packed_object_sha1(p, revidx->nr);2630error("failed to read delta base object%s"2631" at offset %"PRIuMAX" from%s",2632sha1_to_hex(base_sha1), (uintmax_t)obj_offset,2633 p->pack_name);2634mark_bad_packed_object(p, base_sha1);2635 base =read_object(base_sha1, &type, &base_size);2636 external_base = base;2637}2638}26392640 i = --delta_stack_nr;2641 obj_offset = delta_stack[i].obj_offset;2642 curpos = delta_stack[i].curpos;2643 delta_size = delta_stack[i].size;26442645if(!base)2646continue;26472648 delta_data =unpack_compressed_entry(p, &w_curs, curpos, delta_size);26492650if(!delta_data) {2651error("failed to unpack compressed delta "2652"at offset %"PRIuMAX" from%s",2653(uintmax_t)curpos, p->pack_name);2654 data = NULL;2655free(external_base);2656continue;2657}26582659 data =patch_delta(base, base_size,2660 delta_data, delta_size,2661&size);26622663/*2664 * We could not apply the delta; warn the user, but keep going.2665 * Our failure will be noticed either in the next iteration of2666 * the loop, or if this is the final delta, in the caller when2667 * we return NULL. Those code paths will take care of making2668 * a more explicit warning and retrying with another copy of2669 * the object.2670 */2671if(!data)2672error("failed to apply delta");26732674free(delta_data);2675free(external_base);2676}26772678*final_type = type;2679*final_size = size;26802681unuse_pack(&w_curs);26822683if(delta_stack != small_delta_stack)2684free(delta_stack);26852686return data;2687}26882689const unsigned char*nth_packed_object_sha1(struct packed_git *p,2690uint32_t n)2691{2692const unsigned char*index = p->index_data;2693if(!index) {2694if(open_pack_index(p))2695return NULL;2696 index = p->index_data;2697}2698if(n >= p->num_objects)2699return NULL;2700 index +=4*256;2701if(p->index_version ==1) {2702return index +24* n +4;2703}else{2704 index +=8;2705return index +20* n;2706}2707}27082709const struct object_id *nth_packed_object_oid(struct object_id *oid,2710struct packed_git *p,2711uint32_t n)2712{2713const unsigned char*hash =nth_packed_object_sha1(p, n);2714if(!hash)2715return NULL;2716hashcpy(oid->hash, hash);2717return oid;2718}27192720voidcheck_pack_index_ptr(const struct packed_git *p,const void*vptr)2721{2722const unsigned char*ptr = vptr;2723const unsigned char*start = p->index_data;2724const unsigned char*end = start + p->index_size;2725if(ptr < start)2726die(_("offset before start of pack index for%s(corrupt index?)"),2727 p->pack_name);2728/* No need to check for underflow; .idx files must be at least 8 bytes */2729if(ptr >= end -8)2730die(_("offset beyond end of pack index for%s(truncated index?)"),2731 p->pack_name);2732}27332734off_t nth_packed_object_offset(const struct packed_git *p,uint32_t n)2735{2736const unsigned char*index = p->index_data;2737 index +=4*256;2738if(p->index_version ==1) {2739returnntohl(*((uint32_t*)(index +24* n)));2740}else{2741uint32_t off;2742 index +=8+ p->num_objects * (20+4);2743 off =ntohl(*((uint32_t*)(index +4* n)));2744if(!(off &0x80000000))2745return off;2746 index += p->num_objects *4+ (off &0x7fffffff) *8;2747check_pack_index_ptr(p, index);2748return(((uint64_t)ntohl(*((uint32_t*)(index +0)))) <<32) |2749ntohl(*((uint32_t*)(index +4)));2750}2751}27522753off_t find_pack_entry_one(const unsigned char*sha1,2754struct packed_git *p)2755{2756const uint32_t*level1_ofs = p->index_data;2757const unsigned char*index = p->index_data;2758unsigned hi, lo, stride;2759static int use_lookup = -1;2760static int debug_lookup = -1;27612762if(debug_lookup <0)2763 debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");27642765if(!index) {2766if(open_pack_index(p))2767return0;2768 level1_ofs = p->index_data;2769 index = p->index_data;2770}2771if(p->index_version >1) {2772 level1_ofs +=2;2773 index +=8;2774}2775 index +=4*256;2776 hi =ntohl(level1_ofs[*sha1]);2777 lo = ((*sha1 ==0x0) ?0:ntohl(level1_ofs[*sha1 -1]));2778if(p->index_version >1) {2779 stride =20;2780}else{2781 stride =24;2782 index +=4;2783}27842785if(debug_lookup)2786printf("%02x%02x%02x... lo%uhi%unr %"PRIu32"\n",2787 sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);27882789if(use_lookup <0)2790 use_lookup = !!getenv("GIT_USE_LOOKUP");2791if(use_lookup) {2792int pos =sha1_entry_pos(index, stride,0,2793 lo, hi, p->num_objects, sha1);2794if(pos <0)2795return0;2796returnnth_packed_object_offset(p, pos);2797}27982799do{2800unsigned mi = (lo + hi) /2;2801int cmp =hashcmp(index + mi * stride, sha1);28022803if(debug_lookup)2804printf("lo%uhi%urg%umi%u\n",2805 lo, hi, hi - lo, mi);2806if(!cmp)2807returnnth_packed_object_offset(p, mi);2808if(cmp >0)2809 hi = mi;2810else2811 lo = mi+1;2812}while(lo < hi);2813return0;2814}28152816intis_pack_valid(struct packed_git *p)2817{2818/* An already open pack is known to be valid. */2819if(p->pack_fd != -1)2820return1;28212822/* If the pack has one window completely covering the2823 * file size, the pack is known to be valid even if2824 * the descriptor is not currently open.2825 */2826if(p->windows) {2827struct pack_window *w = p->windows;28282829if(!w->offset && w->len == p->pack_size)2830return1;2831}28322833/* Force the pack to open to prove its valid. */2834return!open_packed_git(p);2835}28362837static intfill_pack_entry(const unsigned char*sha1,2838struct pack_entry *e,2839struct packed_git *p)2840{2841 off_t offset;28422843if(p->num_bad_objects) {2844unsigned i;2845for(i =0; i < p->num_bad_objects; i++)2846if(!hashcmp(sha1, p->bad_object_sha1 +20* i))2847return0;2848}28492850 offset =find_pack_entry_one(sha1, p);2851if(!offset)2852return0;28532854/*2855 * We are about to tell the caller where they can locate the2856 * requested object. We better make sure the packfile is2857 * still here and can be accessed before supplying that2858 * answer, as it may have been deleted since the index was2859 * loaded!2860 */2861if(!is_pack_valid(p))2862return0;2863 e->offset = offset;2864 e->p = p;2865hashcpy(e->sha1, sha1);2866return1;2867}28682869/*2870 * Iff a pack file contains the object named by sha1, return true and2871 * store its location to e.2872 */2873static intfind_pack_entry(const unsigned char*sha1,struct pack_entry *e)2874{2875struct mru_entry *p;28762877prepare_packed_git();2878if(!packed_git)2879return0;28802881for(p = packed_git_mru->head; p; p = p->next) {2882if(fill_pack_entry(sha1, e, p->item)) {2883mru_mark(packed_git_mru, p);2884return1;2885}2886}2887return0;2888}28892890struct packed_git *find_sha1_pack(const unsigned char*sha1,2891struct packed_git *packs)2892{2893struct packed_git *p;28942895for(p = packs; p; p = p->next) {2896if(find_pack_entry_one(sha1, p))2897return p;2898}2899return NULL;29002901}29022903static intsha1_loose_object_info(const unsigned char*sha1,2904struct object_info *oi,2905int flags)2906{2907int status =0;2908unsigned long mapsize;2909void*map;2910 git_zstream stream;2911char hdr[32];2912struct strbuf hdrbuf = STRBUF_INIT;29132914if(oi->delta_base_sha1)2915hashclr(oi->delta_base_sha1);29162917/*2918 * If we don't care about type or size, then we don't2919 * need to look inside the object at all. Note that we2920 * do not optimize out the stat call, even if the2921 * caller doesn't care about the disk-size, since our2922 * return value implicitly indicates whether the2923 * object even exists.2924 */2925if(!oi->typep && !oi->typename && !oi->sizep) {2926const char*path;2927struct stat st;2928if(stat_sha1_file(sha1, &st, &path) <0)2929return-1;2930if(oi->disk_sizep)2931*oi->disk_sizep = st.st_size;2932return0;2933}29342935 map =map_sha1_file(sha1, &mapsize);2936if(!map)2937return-1;2938if(oi->disk_sizep)2939*oi->disk_sizep = mapsize;2940if((flags & LOOKUP_UNKNOWN_OBJECT)) {2941if(unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr,sizeof(hdr), &hdrbuf) <0)2942 status =error("unable to unpack%sheader with --allow-unknown-type",2943sha1_to_hex(sha1));2944}else if(unpack_sha1_header(&stream, map, mapsize, hdr,sizeof(hdr)) <0)2945 status =error("unable to unpack%sheader",2946sha1_to_hex(sha1));2947if(status <0)2948;/* Do nothing */2949else if(hdrbuf.len) {2950if((status =parse_sha1_header_extended(hdrbuf.buf, oi, flags)) <0)2951 status =error("unable to parse%sheader with --allow-unknown-type",2952sha1_to_hex(sha1));2953}else if((status =parse_sha1_header_extended(hdr, oi, flags)) <0)2954 status =error("unable to parse%sheader",sha1_to_hex(sha1));2955git_inflate_end(&stream);2956munmap(map, mapsize);2957if(status && oi->typep)2958*oi->typep = status;2959strbuf_release(&hdrbuf);2960return0;2961}29622963intsha1_object_info_extended(const unsigned char*sha1,struct object_info *oi,unsigned flags)2964{2965struct cached_object *co;2966struct pack_entry e;2967int rtype;2968enum object_type real_type;2969const unsigned char*real =lookup_replace_object_extended(sha1, flags);29702971 co =find_cached_object(real);2972if(co) {2973if(oi->typep)2974*(oi->typep) = co->type;2975if(oi->sizep)2976*(oi->sizep) = co->size;2977if(oi->disk_sizep)2978*(oi->disk_sizep) =0;2979if(oi->delta_base_sha1)2980hashclr(oi->delta_base_sha1);2981if(oi->typename)2982strbuf_addstr(oi->typename,typename(co->type));2983 oi->whence = OI_CACHED;2984return0;2985}29862987if(!find_pack_entry(real, &e)) {2988/* Most likely it's a loose object. */2989if(!sha1_loose_object_info(real, oi, flags)) {2990 oi->whence = OI_LOOSE;2991return0;2992}29932994/* Not a loose object; someone else may have just packed it. */2995reprepare_packed_git();2996if(!find_pack_entry(real, &e))2997return-1;2998}29993000/*3001 * packed_object_info() does not follow the delta chain to3002 * find out the real type, unless it is given oi->typep.3003 */3004if(oi->typename && !oi->typep)3005 oi->typep = &real_type;30063007 rtype =packed_object_info(e.p, e.offset, oi);3008if(rtype <0) {3009mark_bad_packed_object(e.p, real);3010if(oi->typep == &real_type)3011 oi->typep = NULL;3012returnsha1_object_info_extended(real, oi,0);3013}else if(in_delta_base_cache(e.p, e.offset)) {3014 oi->whence = OI_DBCACHED;3015}else{3016 oi->whence = OI_PACKED;3017 oi->u.packed.offset = e.offset;3018 oi->u.packed.pack = e.p;3019 oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||3020 rtype == OBJ_OFS_DELTA);3021}3022if(oi->typename)3023strbuf_addstr(oi->typename,typename(*oi->typep));3024if(oi->typep == &real_type)3025 oi->typep = NULL;30263027return0;3028}30293030/* returns enum object_type or negative */3031intsha1_object_info(const unsigned char*sha1,unsigned long*sizep)3032{3033enum object_type type;3034struct object_info oi = OBJECT_INFO_INIT;30353036 oi.typep = &type;3037 oi.sizep = sizep;3038if(sha1_object_info_extended(sha1, &oi, LOOKUP_REPLACE_OBJECT) <0)3039return-1;3040return type;3041}30423043static void*read_packed_sha1(const unsigned char*sha1,3044enum object_type *type,unsigned long*size)3045{3046struct pack_entry e;3047void*data;30483049if(!find_pack_entry(sha1, &e))3050return NULL;3051 data =cache_or_unpack_entry(e.p, e.offset, size, type);3052if(!data) {3053/*3054 * We're probably in deep shit, but let's try to fetch3055 * the required object anyway from another pack or loose.3056 * This should happen only in the presence of a corrupted3057 * pack, and is better than failing outright.3058 */3059error("failed to read object%sat offset %"PRIuMAX" from%s",3060sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);3061mark_bad_packed_object(e.p, sha1);3062 data =read_object(sha1, type, size);3063}3064return data;3065}30663067intpretend_sha1_file(void*buf,unsigned long len,enum object_type type,3068unsigned char*sha1)3069{3070struct cached_object *co;30713072hash_sha1_file(buf, len,typename(type), sha1);3073if(has_sha1_file(sha1) ||find_cached_object(sha1))3074return0;3075ALLOC_GROW(cached_objects, cached_object_nr +1, cached_object_alloc);3076 co = &cached_objects[cached_object_nr++];3077 co->size = len;3078 co->type = type;3079 co->buf =xmalloc(len);3080memcpy(co->buf, buf, len);3081hashcpy(co->sha1, sha1);3082return0;3083}30843085static void*read_object(const unsigned char*sha1,enum object_type *type,3086unsigned long*size)3087{3088unsigned long mapsize;3089void*map, *buf;3090struct cached_object *co;30913092 co =find_cached_object(sha1);3093if(co) {3094*type = co->type;3095*size = co->size;3096returnxmemdupz(co->buf, co->size);3097}30983099 buf =read_packed_sha1(sha1, type, size);3100if(buf)3101return buf;3102 map =map_sha1_file(sha1, &mapsize);3103if(map) {3104 buf =unpack_sha1_file(map, mapsize, type, size, sha1);3105munmap(map, mapsize);3106return buf;3107}3108reprepare_packed_git();3109returnread_packed_sha1(sha1, type, size);3110}31113112/*3113 * This function dies on corrupt objects; the callers who want to3114 * deal with them should arrange to call read_object() and give error3115 * messages themselves.3116 */3117void*read_sha1_file_extended(const unsigned char*sha1,3118enum object_type *type,3119unsigned long*size,3120unsigned flag)3121{3122void*data;3123const struct packed_git *p;3124const char*path;3125struct stat st;3126const unsigned char*repl =lookup_replace_object_extended(sha1, flag);31273128 errno =0;3129 data =read_object(repl, type, size);3130if(data)3131return data;31323133if(errno && errno != ENOENT)3134die_errno("failed to read object%s",sha1_to_hex(sha1));31353136/* die if we replaced an object with one that does not exist */3137if(repl != sha1)3138die("replacement%snot found for%s",3139sha1_to_hex(repl),sha1_to_hex(sha1));31403141if(!stat_sha1_file(repl, &st, &path))3142die("loose object%s(stored in%s) is corrupt",3143sha1_to_hex(repl), path);31443145if((p =has_packed_and_bad(repl)) != NULL)3146die("packed object%s(stored in%s) is corrupt",3147sha1_to_hex(repl), p->pack_name);31483149return NULL;3150}31513152void*read_object_with_reference(const unsigned char*sha1,3153const char*required_type_name,3154unsigned long*size,3155unsigned char*actual_sha1_return)3156{3157enum object_type type, required_type;3158void*buffer;3159unsigned long isize;3160unsigned char actual_sha1[20];31613162 required_type =type_from_string(required_type_name);3163hashcpy(actual_sha1, sha1);3164while(1) {3165int ref_length = -1;3166const char*ref_type = NULL;31673168 buffer =read_sha1_file(actual_sha1, &type, &isize);3169if(!buffer)3170return NULL;3171if(type == required_type) {3172*size = isize;3173if(actual_sha1_return)3174hashcpy(actual_sha1_return, actual_sha1);3175return buffer;3176}3177/* Handle references */3178else if(type == OBJ_COMMIT)3179 ref_type ="tree ";3180else if(type == OBJ_TAG)3181 ref_type ="object ";3182else{3183free(buffer);3184return NULL;3185}3186 ref_length =strlen(ref_type);31873188if(ref_length +40> isize ||3189memcmp(buffer, ref_type, ref_length) ||3190get_sha1_hex((char*) buffer + ref_length, actual_sha1)) {3191free(buffer);3192return NULL;3193}3194free(buffer);3195/* Now we have the ID of the referred-to object in3196 * actual_sha1. Check again. */3197}3198}31993200static voidwrite_sha1_file_prepare(const void*buf,unsigned long len,3201const char*type,unsigned char*sha1,3202char*hdr,int*hdrlen)3203{3204 git_SHA_CTX c;32053206/* Generate the header */3207*hdrlen =xsnprintf(hdr, *hdrlen,"%s %lu", type, len)+1;32083209/* Sha1.. */3210git_SHA1_Init(&c);3211git_SHA1_Update(&c, hdr, *hdrlen);3212git_SHA1_Update(&c, buf, len);3213git_SHA1_Final(sha1, &c);3214}32153216/*3217 * Move the just written object into its final resting place.3218 */3219intfinalize_object_file(const char*tmpfile,const char*filename)3220{3221int ret =0;32223223if(object_creation_mode == OBJECT_CREATION_USES_RENAMES)3224goto try_rename;3225else if(link(tmpfile, filename))3226 ret = errno;32273228/*3229 * Coda hack - coda doesn't like cross-directory links,3230 * so we fall back to a rename, which will mean that it3231 * won't be able to check collisions, but that's not a3232 * big deal.3233 *3234 * The same holds for FAT formatted media.3235 *3236 * When this succeeds, we just return. We have nothing3237 * left to unlink.3238 */3239if(ret && ret != EEXIST) {3240 try_rename:3241if(!rename(tmpfile, filename))3242goto out;3243 ret = errno;3244}3245unlink_or_warn(tmpfile);3246if(ret) {3247if(ret != EEXIST) {3248returnerror_errno("unable to write sha1 filename%s", filename);3249}3250/* FIXME!!! Collision check here ? */3251}32523253out:3254if(adjust_shared_perm(filename))3255returnerror("unable to set permission to '%s'", filename);3256return0;3257}32583259static intwrite_buffer(int fd,const void*buf,size_t len)3260{3261if(write_in_full(fd, buf, len) <0)3262returnerror_errno("file write error");3263return0;3264}32653266inthash_sha1_file(const void*buf,unsigned long len,const char*type,3267unsigned char*sha1)3268{3269char hdr[32];3270int hdrlen =sizeof(hdr);3271write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);3272return0;3273}32743275/* Finalize a file on disk, and close it. */3276static voidclose_sha1_file(int fd)3277{3278if(fsync_object_files)3279fsync_or_die(fd,"sha1 file");3280if(close(fd) !=0)3281die_errno("error when closing sha1 file");3282}32833284/* Size of directory component, including the ending '/' */3285staticinlineintdirectory_size(const char*filename)3286{3287const char*s =strrchr(filename,'/');3288if(!s)3289return0;3290return s - filename +1;3291}32923293/*3294 * This creates a temporary file in the same directory as the final3295 * 'filename'3296 *3297 * We want to avoid cross-directory filename renames, because those3298 * can have problems on various filesystems (FAT, NFS, Coda).3299 */3300static intcreate_tmpfile(struct strbuf *tmp,const char*filename)3301{3302int fd, dirlen =directory_size(filename);33033304strbuf_reset(tmp);3305strbuf_add(tmp, filename, dirlen);3306strbuf_addstr(tmp,"tmp_obj_XXXXXX");3307 fd =git_mkstemp_mode(tmp->buf,0444);3308if(fd <0&& dirlen && errno == ENOENT) {3309/*3310 * Make sure the directory exists; note that the contents3311 * of the buffer are undefined after mkstemp returns an3312 * error, so we have to rewrite the whole buffer from3313 * scratch.3314 */3315strbuf_reset(tmp);3316strbuf_add(tmp, filename, dirlen -1);3317if(mkdir(tmp->buf,0777) && errno != EEXIST)3318return-1;3319if(adjust_shared_perm(tmp->buf))3320return-1;33213322/* Try again */3323strbuf_addstr(tmp,"/tmp_obj_XXXXXX");3324 fd =git_mkstemp_mode(tmp->buf,0444);3325}3326return fd;3327}33283329static intwrite_loose_object(const unsigned char*sha1,char*hdr,int hdrlen,3330const void*buf,unsigned long len,time_t mtime)3331{3332int fd, ret;3333unsigned char compressed[4096];3334 git_zstream stream;3335 git_SHA_CTX c;3336unsigned char parano_sha1[20];3337static struct strbuf tmp_file = STRBUF_INIT;3338const char*filename =sha1_file_name(sha1);33393340 fd =create_tmpfile(&tmp_file, filename);3341if(fd <0) {3342if(errno == EACCES)3343returnerror("insufficient permission for adding an object to repository database%s",get_object_directory());3344else3345returnerror_errno("unable to create temporary file");3346}33473348/* Set it up */3349git_deflate_init(&stream, zlib_compression_level);3350 stream.next_out = compressed;3351 stream.avail_out =sizeof(compressed);3352git_SHA1_Init(&c);33533354/* First header.. */3355 stream.next_in = (unsigned char*)hdr;3356 stream.avail_in = hdrlen;3357while(git_deflate(&stream,0) == Z_OK)3358;/* nothing */3359git_SHA1_Update(&c, hdr, hdrlen);33603361/* Then the data itself.. */3362 stream.next_in = (void*)buf;3363 stream.avail_in = len;3364do{3365unsigned char*in0 = stream.next_in;3366 ret =git_deflate(&stream, Z_FINISH);3367git_SHA1_Update(&c, in0, stream.next_in - in0);3368if(write_buffer(fd, compressed, stream.next_out - compressed) <0)3369die("unable to write sha1 file");3370 stream.next_out = compressed;3371 stream.avail_out =sizeof(compressed);3372}while(ret == Z_OK);33733374if(ret != Z_STREAM_END)3375die("unable to deflate new object%s(%d)",sha1_to_hex(sha1), ret);3376 ret =git_deflate_end_gently(&stream);3377if(ret != Z_OK)3378die("deflateEnd on object%sfailed (%d)",sha1_to_hex(sha1), ret);3379git_SHA1_Final(parano_sha1, &c);3380if(hashcmp(sha1, parano_sha1) !=0)3381die("confused by unstable object source data for%s",sha1_to_hex(sha1));33823383close_sha1_file(fd);33843385if(mtime) {3386struct utimbuf utb;3387 utb.actime = mtime;3388 utb.modtime = mtime;3389if(utime(tmp_file.buf, &utb) <0)3390warning_errno("failed utime() on%s", tmp_file.buf);3391}33923393returnfinalize_object_file(tmp_file.buf, filename);3394}33953396static intfreshen_loose_object(const unsigned char*sha1)3397{3398returncheck_and_freshen(sha1,1);3399}34003401static intfreshen_packed_object(const unsigned char*sha1)3402{3403struct pack_entry e;3404if(!find_pack_entry(sha1, &e))3405return0;3406if(e.p->freshened)3407return1;3408if(!freshen_file(e.p->pack_name))3409return0;3410 e.p->freshened =1;3411return1;3412}34133414intwrite_sha1_file(const void*buf,unsigned long len,const char*type,unsigned char*sha1)3415{3416char hdr[32];3417int hdrlen =sizeof(hdr);34183419/* Normally if we have it in the pack then we do not bother writing3420 * it out into .git/objects/??/?{38} file.3421 */3422write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);3423if(freshen_packed_object(sha1) ||freshen_loose_object(sha1))3424return0;3425returnwrite_loose_object(sha1, hdr, hdrlen, buf, len,0);3426}34273428inthash_sha1_file_literally(const void*buf,unsigned long len,const char*type,3429unsigned char*sha1,unsigned flags)3430{3431char*header;3432int hdrlen, status =0;34333434/* type string, SP, %lu of the length plus NUL must fit this */3435 hdrlen =strlen(type) +32;3436 header =xmalloc(hdrlen);3437write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen);34383439if(!(flags & HASH_WRITE_OBJECT))3440goto cleanup;3441if(freshen_packed_object(sha1) ||freshen_loose_object(sha1))3442goto cleanup;3443 status =write_loose_object(sha1, header, hdrlen, buf, len,0);34443445cleanup:3446free(header);3447return status;3448}34493450intforce_object_loose(const unsigned char*sha1,time_t mtime)3451{3452void*buf;3453unsigned long len;3454enum object_type type;3455char hdr[32];3456int hdrlen;3457int ret;34583459if(has_loose_object(sha1))3460return0;3461 buf =read_packed_sha1(sha1, &type, &len);3462if(!buf)3463returnerror("cannot read sha1_file for%s",sha1_to_hex(sha1));3464 hdrlen =xsnprintf(hdr,sizeof(hdr),"%s %lu",typename(type), len) +1;3465 ret =write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);3466free(buf);34673468return ret;3469}34703471inthas_pack_index(const unsigned char*sha1)3472{3473struct stat st;3474if(stat(sha1_pack_index_name(sha1), &st))3475return0;3476return1;3477}34783479inthas_sha1_pack(const unsigned char*sha1)3480{3481struct pack_entry e;3482returnfind_pack_entry(sha1, &e);3483}34843485inthas_sha1_file_with_flags(const unsigned char*sha1,int flags)3486{3487struct pack_entry e;34883489if(find_pack_entry(sha1, &e))3490return1;3491if(has_loose_object(sha1))3492return1;3493if(flags & HAS_SHA1_QUICK)3494return0;3495reprepare_packed_git();3496returnfind_pack_entry(sha1, &e);3497}34983499inthas_object_file(const struct object_id *oid)3500{3501returnhas_sha1_file(oid->hash);3502}35033504inthas_object_file_with_flags(const struct object_id *oid,int flags)3505{3506returnhas_sha1_file_with_flags(oid->hash, flags);3507}35083509static voidcheck_tree(const void*buf,size_t size)3510{3511struct tree_desc desc;3512struct name_entry entry;35133514init_tree_desc(&desc, buf, size);3515while(tree_entry(&desc, &entry))3516/* do nothing3517 * tree_entry() will die() on malformed entries */3518;3519}35203521static voidcheck_commit(const void*buf,size_t size)3522{3523struct commit c;3524memset(&c,0,sizeof(c));3525if(parse_commit_buffer(&c, buf, size))3526die("corrupt commit");3527}35283529static voidcheck_tag(const void*buf,size_t size)3530{3531struct tag t;3532memset(&t,0,sizeof(t));3533if(parse_tag_buffer(&t, buf, size))3534die("corrupt tag");3535}35363537static intindex_mem(unsigned char*sha1,void*buf,size_t size,3538enum object_type type,3539const char*path,unsigned flags)3540{3541int ret, re_allocated =0;3542int write_object = flags & HASH_WRITE_OBJECT;35433544if(!type)3545 type = OBJ_BLOB;35463547/*3548 * Convert blobs to git internal format3549 */3550if((type == OBJ_BLOB) && path) {3551struct strbuf nbuf = STRBUF_INIT;3552if(convert_to_git(path, buf, size, &nbuf,3553 write_object ? safe_crlf : SAFE_CRLF_FALSE)) {3554 buf =strbuf_detach(&nbuf, &size);3555 re_allocated =1;3556}3557}3558if(flags & HASH_FORMAT_CHECK) {3559if(type == OBJ_TREE)3560check_tree(buf, size);3561if(type == OBJ_COMMIT)3562check_commit(buf, size);3563if(type == OBJ_TAG)3564check_tag(buf, size);3565}35663567if(write_object)3568 ret =write_sha1_file(buf, size,typename(type), sha1);3569else3570 ret =hash_sha1_file(buf, size,typename(type), sha1);3571if(re_allocated)3572free(buf);3573return ret;3574}35753576static intindex_stream_convert_blob(unsigned char*sha1,int fd,3577const char*path,unsigned flags)3578{3579int ret;3580const int write_object = flags & HASH_WRITE_OBJECT;3581struct strbuf sbuf = STRBUF_INIT;35823583assert(path);3584assert(would_convert_to_git_filter_fd(path));35853586convert_to_git_filter_fd(path, fd, &sbuf,3587 write_object ? safe_crlf : SAFE_CRLF_FALSE);35883589if(write_object)3590 ret =write_sha1_file(sbuf.buf, sbuf.len,typename(OBJ_BLOB),3591 sha1);3592else3593 ret =hash_sha1_file(sbuf.buf, sbuf.len,typename(OBJ_BLOB),3594 sha1);3595strbuf_release(&sbuf);3596return ret;3597}35983599static intindex_pipe(unsigned char*sha1,int fd,enum object_type type,3600const char*path,unsigned flags)3601{3602struct strbuf sbuf = STRBUF_INIT;3603int ret;36043605if(strbuf_read(&sbuf, fd,4096) >=0)3606 ret =index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags);3607else3608 ret = -1;3609strbuf_release(&sbuf);3610return ret;3611}36123613#define SMALL_FILE_SIZE (32*1024)36143615static intindex_core(unsigned char*sha1,int fd,size_t size,3616enum object_type type,const char*path,3617unsigned flags)3618{3619int ret;36203621if(!size) {3622 ret =index_mem(sha1,"", size, type, path, flags);3623}else if(size <= SMALL_FILE_SIZE) {3624char*buf =xmalloc(size);3625if(size ==read_in_full(fd, buf, size))3626 ret =index_mem(sha1, buf, size, type, path, flags);3627else3628 ret =error_errno("short read");3629free(buf);3630}else{3631void*buf =xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd,0);3632 ret =index_mem(sha1, buf, size, type, path, flags);3633munmap(buf, size);3634}3635return ret;3636}36373638/*3639 * This creates one packfile per large blob unless bulk-checkin3640 * machinery is "plugged".3641 *3642 * This also bypasses the usual "convert-to-git" dance, and that is on3643 * purpose. We could write a streaming version of the converting3644 * functions and insert that before feeding the data to fast-import3645 * (or equivalent in-core API described above). However, that is3646 * somewhat complicated, as we do not know the size of the filter3647 * result, which we need to know beforehand when writing a git object.3648 * Since the primary motivation for trying to stream from the working3649 * tree file and to avoid mmaping it in core is to deal with large3650 * binary blobs, they generally do not want to get any conversion, and3651 * callers should avoid this code path when filters are requested.3652 */3653static intindex_stream(unsigned char*sha1,int fd,size_t size,3654enum object_type type,const char*path,3655unsigned flags)3656{3657returnindex_bulk_checkin(sha1, fd, size, type, path, flags);3658}36593660intindex_fd(unsigned char*sha1,int fd,struct stat *st,3661enum object_type type,const char*path,unsigned flags)3662{3663int ret;36643665/*3666 * Call xsize_t() only when needed to avoid potentially unnecessary3667 * die() for large files.3668 */3669if(type == OBJ_BLOB && path &&would_convert_to_git_filter_fd(path))3670 ret =index_stream_convert_blob(sha1, fd, path, flags);3671else if(!S_ISREG(st->st_mode))3672 ret =index_pipe(sha1, fd, type, path, flags);3673else if(st->st_size <= big_file_threshold || type != OBJ_BLOB ||3674(path &&would_convert_to_git(path)))3675 ret =index_core(sha1, fd,xsize_t(st->st_size), type, path,3676 flags);3677else3678 ret =index_stream(sha1, fd,xsize_t(st->st_size), type, path,3679 flags);3680close(fd);3681return ret;3682}36833684intindex_path(unsigned char*sha1,const char*path,struct stat *st,unsigned flags)3685{3686int fd;3687struct strbuf sb = STRBUF_INIT;36883689switch(st->st_mode & S_IFMT) {3690case S_IFREG:3691 fd =open(path, O_RDONLY);3692if(fd <0)3693returnerror_errno("open(\"%s\")", path);3694if(index_fd(sha1, fd, st, OBJ_BLOB, path, flags) <0)3695returnerror("%s: failed to insert into database",3696 path);3697break;3698case S_IFLNK:3699if(strbuf_readlink(&sb, path, st->st_size))3700returnerror_errno("readlink(\"%s\")", path);3701if(!(flags & HASH_WRITE_OBJECT))3702hash_sha1_file(sb.buf, sb.len, blob_type, sha1);3703else if(write_sha1_file(sb.buf, sb.len, blob_type, sha1))3704returnerror("%s: failed to insert into database",3705 path);3706strbuf_release(&sb);3707break;3708case S_IFDIR:3709returnresolve_gitlink_ref(path,"HEAD", sha1);3710default:3711returnerror("%s: unsupported file type", path);3712}3713return0;3714}37153716intread_pack_header(int fd,struct pack_header *header)3717{3718if(read_in_full(fd, header,sizeof(*header)) <sizeof(*header))3719/* "eof before pack header was fully read" */3720return PH_ERROR_EOF;37213722if(header->hdr_signature !=htonl(PACK_SIGNATURE))3723/* "protocol error (pack signature mismatch detected)" */3724return PH_ERROR_PACK_SIGNATURE;3725if(!pack_version_ok(header->hdr_version))3726/* "protocol error (pack version unsupported)" */3727return PH_ERROR_PROTOCOL;3728return0;3729}37303731voidassert_sha1_type(const unsigned char*sha1,enum object_type expect)3732{3733enum object_type type =sha1_object_info(sha1, NULL);3734if(type <0)3735die("%sis not a valid object",sha1_to_hex(sha1));3736if(type != expect)3737die("%sis not a valid '%s' object",sha1_to_hex(sha1),3738typename(expect));3739}37403741static intfor_each_file_in_obj_subdir(int subdir_nr,3742struct strbuf *path,3743 each_loose_object_fn obj_cb,3744 each_loose_cruft_fn cruft_cb,3745 each_loose_subdir_fn subdir_cb,3746void*data)3747{3748size_t baselen = path->len;3749DIR*dir =opendir(path->buf);3750struct dirent *de;3751int r =0;37523753if(!dir) {3754if(errno == ENOENT)3755return0;3756returnerror_errno("unable to open%s", path->buf);3757}37583759while((de =readdir(dir))) {3760if(is_dot_or_dotdot(de->d_name))3761continue;37623763strbuf_setlen(path, baselen);3764strbuf_addf(path,"/%s", de->d_name);37653766if(strlen(de->d_name) == GIT_SHA1_HEXSZ -2) {3767char hex[GIT_SHA1_HEXSZ+1];3768struct object_id oid;37693770snprintf(hex,sizeof(hex),"%02x%s",3771 subdir_nr, de->d_name);3772if(!get_oid_hex(hex, &oid)) {3773if(obj_cb) {3774 r =obj_cb(&oid, path->buf, data);3775if(r)3776break;3777}3778continue;3779}3780}37813782if(cruft_cb) {3783 r =cruft_cb(de->d_name, path->buf, data);3784if(r)3785break;3786}3787}3788closedir(dir);37893790strbuf_setlen(path, baselen);3791if(!r && subdir_cb)3792 r =subdir_cb(subdir_nr, path->buf, data);37933794return r;3795}37963797intfor_each_loose_file_in_objdir_buf(struct strbuf *path,3798 each_loose_object_fn obj_cb,3799 each_loose_cruft_fn cruft_cb,3800 each_loose_subdir_fn subdir_cb,3801void*data)3802{3803size_t baselen = path->len;3804int r =0;3805int i;38063807for(i =0; i <256; i++) {3808strbuf_addf(path,"/%02x", i);3809 r =for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,3810 subdir_cb, data);3811strbuf_setlen(path, baselen);3812if(r)3813break;3814}38153816return r;3817}38183819intfor_each_loose_file_in_objdir(const char*path,3820 each_loose_object_fn obj_cb,3821 each_loose_cruft_fn cruft_cb,3822 each_loose_subdir_fn subdir_cb,3823void*data)3824{3825struct strbuf buf = STRBUF_INIT;3826int r;38273828strbuf_addstr(&buf, path);3829 r =for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb,3830 subdir_cb, data);3831strbuf_release(&buf);38323833return r;3834}38353836struct loose_alt_odb_data {3837 each_loose_object_fn *cb;3838void*data;3839};38403841static intloose_from_alt_odb(struct alternate_object_database *alt,3842void*vdata)3843{3844struct loose_alt_odb_data *data = vdata;3845struct strbuf buf = STRBUF_INIT;3846int r;38473848strbuf_addstr(&buf, alt->path);3849 r =for_each_loose_file_in_objdir_buf(&buf,3850 data->cb, NULL, NULL,3851 data->data);3852strbuf_release(&buf);3853return r;3854}38553856intfor_each_loose_object(each_loose_object_fn cb,void*data,unsigned flags)3857{3858struct loose_alt_odb_data alt;3859int r;38603861 r =for_each_loose_file_in_objdir(get_object_directory(),3862 cb, NULL, NULL, data);3863if(r)3864return r;38653866if(flags & FOR_EACH_OBJECT_LOCAL_ONLY)3867return0;38683869 alt.cb = cb;3870 alt.data = data;3871returnforeach_alt_odb(loose_from_alt_odb, &alt);3872}38733874static intfor_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb,void*data)3875{3876uint32_t i;3877int r =0;38783879for(i =0; i < p->num_objects; i++) {3880struct object_id oid;38813882if(!nth_packed_object_oid(&oid, p, i))3883returnerror("unable to get sha1 of object%uin%s",3884 i, p->pack_name);38853886 r =cb(&oid, p, i, data);3887if(r)3888break;3889}3890return r;3891}38923893intfor_each_packed_object(each_packed_object_fn cb,void*data,unsigned flags)3894{3895struct packed_git *p;3896int r =0;3897int pack_errors =0;38983899prepare_packed_git();3900for(p = packed_git; p; p = p->next) {3901if((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)3902continue;3903if(open_pack_index(p)) {3904 pack_errors =1;3905continue;3906}3907 r =for_each_object_in_pack(p, cb, data);3908if(r)3909break;3910}3911return r ? r : pack_errors;3912}39133914static intcheck_stream_sha1(git_zstream *stream,3915const char*hdr,3916unsigned long size,3917const char*path,3918const unsigned char*expected_sha1)3919{3920 git_SHA_CTX c;3921unsigned char real_sha1[GIT_SHA1_RAWSZ];3922unsigned char buf[4096];3923unsigned long total_read;3924int status = Z_OK;39253926git_SHA1_Init(&c);3927git_SHA1_Update(&c, hdr, stream->total_out);39283929/*3930 * We already read some bytes into hdr, but the ones up to the NUL3931 * do not count against the object's content size.3932 */3933 total_read = stream->total_out -strlen(hdr) -1;39343935/*3936 * This size comparison must be "<=" to read the final zlib packets;3937 * see the comment in unpack_sha1_rest for details.3938 */3939while(total_read <= size &&3940(status == Z_OK || status == Z_BUF_ERROR)) {3941 stream->next_out = buf;3942 stream->avail_out =sizeof(buf);3943if(size - total_read < stream->avail_out)3944 stream->avail_out = size - total_read;3945 status =git_inflate(stream, Z_FINISH);3946git_SHA1_Update(&c, buf, stream->next_out - buf);3947 total_read += stream->next_out - buf;3948}3949git_inflate_end(stream);39503951if(status != Z_STREAM_END) {3952error("corrupt loose object '%s'",sha1_to_hex(expected_sha1));3953return-1;3954}3955if(stream->avail_in) {3956error("garbage at end of loose object '%s'",3957sha1_to_hex(expected_sha1));3958return-1;3959}39603961git_SHA1_Final(real_sha1, &c);3962if(hashcmp(expected_sha1, real_sha1)) {3963error("sha1 mismatch for%s(expected%s)", path,3964sha1_to_hex(expected_sha1));3965return-1;3966}39673968return0;3969}39703971intread_loose_object(const char*path,3972const unsigned char*expected_sha1,3973enum object_type *type,3974unsigned long*size,3975void**contents)3976{3977int ret = -1;3978int fd = -1;3979void*map = NULL;3980unsigned long mapsize;3981 git_zstream stream;3982char hdr[32];39833984*contents = NULL;39853986 map =map_sha1_file_1(path, NULL, &mapsize);3987if(!map) {3988error_errno("unable to mmap%s", path);3989goto out;3990}39913992if(unpack_sha1_header(&stream, map, mapsize, hdr,sizeof(hdr)) <0) {3993error("unable to unpack header of%s", path);3994goto out;3995}39963997*type =parse_sha1_header(hdr, size);3998if(*type <0) {3999error("unable to parse header of%s", path);4000git_inflate_end(&stream);4001goto out;4002}40034004if(*type == OBJ_BLOB) {4005if(check_stream_sha1(&stream, hdr, *size, path, expected_sha1) <0)4006goto out;4007}else{4008*contents =unpack_sha1_rest(&stream, hdr, *size, expected_sha1);4009if(!*contents) {4010error("unable to unpack contents of%s", path);4011git_inflate_end(&stream);4012goto out;4013}4014if(check_sha1_signature(expected_sha1, *contents,4015*size,typename(*type))) {4016error("sha1 mismatch for%s(expected%s)", path,4017sha1_to_hex(expected_sha1));4018free(*contents);4019goto out;4020}4021}40224023 ret =0;/* everything checks out */40244025out:4026if(map)4027munmap(map, mapsize);4028if(fd >=0)4029close(fd);4030return ret;4031}