1/* 2 * GIT - The information manager from hell 3 * 4 * Copyright (C) Linus Torvalds, 2005 5 * 6 * This handles basic git sha1 object files - packing, unpacking, 7 * creation etc. 8 */ 9#include"cache.h" 10#include"string-list.h" 11#include"lockfile.h" 12#include"delta.h" 13#include"pack.h" 14#include"blob.h" 15#include"commit.h" 16#include"run-command.h" 17#include"tag.h" 18#include"tree.h" 19#include"tree-walk.h" 20#include"refs.h" 21#include"pack-revindex.h" 22#include"sha1-lookup.h" 23#include"bulk-checkin.h" 24#include"streaming.h" 25#include"dir.h" 26#include"mru.h" 27#include"list.h" 28#include"mergesort.h" 29#include"quote.h" 30 31#define SZ_FMT PRIuMAX 32staticinlineuintmax_tsz_fmt(size_t s) {return s; } 33 34const unsigned char null_sha1[20]; 35const struct object_id null_oid; 36const struct object_id empty_tree_oid = { 37 EMPTY_TREE_SHA1_BIN_LITERAL 38}; 39const struct object_id empty_blob_oid = { 40 EMPTY_BLOB_SHA1_BIN_LITERAL 41}; 42 43/* 44 * This is meant to hold a *small* number of objects that you would 45 * want read_sha1_file() to be able to return, but yet you do not want 46 * to write them into the object store (e.g. a browse-only 47 * application). 48 */ 49static struct cached_object { 50unsigned char sha1[20]; 51enum object_type type; 52void*buf; 53unsigned long size; 54} *cached_objects; 55static int cached_object_nr, cached_object_alloc; 56 57static struct cached_object empty_tree = { 58 EMPTY_TREE_SHA1_BIN_LITERAL, 59 OBJ_TREE, 60"", 610 62}; 63 64static struct cached_object *find_cached_object(const unsigned char*sha1) 65{ 66int i; 67struct cached_object *co = cached_objects; 68 69for(i =0; i < cached_object_nr; i++, co++) { 70if(!hashcmp(co->sha1, sha1)) 71return co; 72} 73if(!hashcmp(sha1, empty_tree.sha1)) 74return&empty_tree; 75return NULL; 76} 77 78intmkdir_in_gitdir(const char*path) 79{ 80if(mkdir(path,0777)) { 81int saved_errno = errno; 82struct stat st; 83struct strbuf sb = STRBUF_INIT; 84 85if(errno != EEXIST) 86return-1; 87/* 88 * Are we looking at a path in a symlinked worktree 89 * whose original repository does not yet have it? 90 * e.g. .git/rr-cache pointing at its original 91 * repository in which the user hasn't performed any 92 * conflict resolution yet? 93 */ 94if(lstat(path, &st) || !S_ISLNK(st.st_mode) || 95strbuf_readlink(&sb, path, st.st_size) || 96!is_absolute_path(sb.buf) || 97mkdir(sb.buf,0777)) { 98strbuf_release(&sb); 99 errno = saved_errno; 100return-1; 101} 102strbuf_release(&sb); 103} 104returnadjust_shared_perm(path); 105} 106 107enum scld_error safe_create_leading_directories(char*path) 108{ 109char*next_component = path +offset_1st_component(path); 110enum scld_error ret = SCLD_OK; 111 112while(ret == SCLD_OK && next_component) { 113struct stat st; 114char*slash = next_component, slash_character; 115 116while(*slash && !is_dir_sep(*slash)) 117 slash++; 118 119if(!*slash) 120break; 121 122 next_component = slash +1; 123while(is_dir_sep(*next_component)) 124 next_component++; 125if(!*next_component) 126break; 127 128 slash_character = *slash; 129*slash ='\0'; 130if(!stat(path, &st)) { 131/* path exists */ 132if(!S_ISDIR(st.st_mode)) { 133 errno = ENOTDIR; 134 ret = SCLD_EXISTS; 135} 136}else if(mkdir(path,0777)) { 137if(errno == EEXIST && 138!stat(path, &st) &&S_ISDIR(st.st_mode)) 139;/* somebody created it since we checked */ 140else if(errno == ENOENT) 141/* 142 * Either mkdir() failed because 143 * somebody just pruned the containing 144 * directory, or stat() failed because 145 * the file that was in our way was 146 * just removed. Either way, inform 147 * the caller that it might be worth 148 * trying again: 149 */ 150 ret = SCLD_VANISHED; 151else 152 ret = SCLD_FAILED; 153}else if(adjust_shared_perm(path)) { 154 ret = SCLD_PERMS; 155} 156*slash = slash_character; 157} 158return ret; 159} 160 161enum scld_error safe_create_leading_directories_const(const char*path) 162{ 163int save_errno; 164/* path points to cache entries, so xstrdup before messing with it */ 165char*buf =xstrdup(path); 166enum scld_error result =safe_create_leading_directories(buf); 167 168 save_errno = errno; 169free(buf); 170 errno = save_errno; 171return result; 172} 173 174intraceproof_create_file(const char*path, create_file_fn fn,void*cb) 175{ 176/* 177 * The number of times we will try to remove empty directories 178 * in the way of path. This is only 1 because if another 179 * process is racily creating directories that conflict with 180 * us, we don't want to fight against them. 181 */ 182int remove_directories_remaining =1; 183 184/* 185 * The number of times that we will try to create the 186 * directories containing path. We are willing to attempt this 187 * more than once, because another process could be trying to 188 * clean up empty directories at the same time as we are 189 * trying to create them. 190 */ 191int create_directories_remaining =3; 192 193/* A scratch copy of path, filled lazily if we need it: */ 194struct strbuf path_copy = STRBUF_INIT; 195 196int ret, save_errno; 197 198/* Sanity check: */ 199assert(*path); 200 201retry_fn: 202 ret =fn(path, cb); 203 save_errno = errno; 204if(!ret) 205goto out; 206 207if(errno == EISDIR && remove_directories_remaining-- >0) { 208/* 209 * A directory is in the way. Maybe it is empty; try 210 * to remove it: 211 */ 212if(!path_copy.len) 213strbuf_addstr(&path_copy, path); 214 215if(!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY)) 216goto retry_fn; 217}else if(errno == ENOENT && create_directories_remaining-- >0) { 218/* 219 * Maybe the containing directory didn't exist, or 220 * maybe it was just deleted by a process that is 221 * racing with us to clean up empty directories. Try 222 * to create it: 223 */ 224enum scld_error scld_result; 225 226if(!path_copy.len) 227strbuf_addstr(&path_copy, path); 228 229do{ 230 scld_result =safe_create_leading_directories(path_copy.buf); 231if(scld_result == SCLD_OK) 232goto retry_fn; 233}while(scld_result == SCLD_VANISHED && create_directories_remaining-- >0); 234} 235 236out: 237strbuf_release(&path_copy); 238 errno = save_errno; 239return ret; 240} 241 242static voidfill_sha1_path(struct strbuf *buf,const unsigned char*sha1) 243{ 244int i; 245for(i =0; i <20; i++) { 246static char hex[] ="0123456789abcdef"; 247unsigned int val = sha1[i]; 248strbuf_addch(buf, hex[val >>4]); 249strbuf_addch(buf, hex[val &0xf]); 250if(!i) 251strbuf_addch(buf,'/'); 252} 253} 254 255const char*sha1_file_name(const unsigned char*sha1) 256{ 257static struct strbuf buf = STRBUF_INIT; 258 259strbuf_reset(&buf); 260strbuf_addf(&buf,"%s/",get_object_directory()); 261 262fill_sha1_path(&buf, sha1); 263return buf.buf; 264} 265 266struct strbuf *alt_scratch_buf(struct alternate_object_database *alt) 267{ 268strbuf_setlen(&alt->scratch, alt->base_len); 269return&alt->scratch; 270} 271 272static const char*alt_sha1_path(struct alternate_object_database *alt, 273const unsigned char*sha1) 274{ 275struct strbuf *buf =alt_scratch_buf(alt); 276fill_sha1_path(buf, sha1); 277return buf->buf; 278} 279 280/* 281 * Return the name of the pack or index file with the specified sha1 282 * in its filename. *base and *name are scratch space that must be 283 * provided by the caller. which should be "pack" or "idx". 284 */ 285static char*sha1_get_pack_name(const unsigned char*sha1, 286struct strbuf *buf, 287const char*which) 288{ 289strbuf_reset(buf); 290strbuf_addf(buf,"%s/pack/pack-%s.%s",get_object_directory(), 291sha1_to_hex(sha1), which); 292return buf->buf; 293} 294 295char*sha1_pack_name(const unsigned char*sha1) 296{ 297static struct strbuf buf = STRBUF_INIT; 298returnsha1_get_pack_name(sha1, &buf,"pack"); 299} 300 301char*sha1_pack_index_name(const unsigned char*sha1) 302{ 303static struct strbuf buf = STRBUF_INIT; 304returnsha1_get_pack_name(sha1, &buf,"idx"); 305} 306 307struct alternate_object_database *alt_odb_list; 308static struct alternate_object_database **alt_odb_tail; 309 310/* 311 * Return non-zero iff the path is usable as an alternate object database. 312 */ 313static intalt_odb_usable(struct strbuf *path,const char*normalized_objdir) 314{ 315struct alternate_object_database *alt; 316 317/* Detect cases where alternate disappeared */ 318if(!is_directory(path->buf)) { 319error("object directory%sdoes not exist; " 320"check .git/objects/info/alternates.", 321 path->buf); 322return0; 323} 324 325/* 326 * Prevent the common mistake of listing the same 327 * thing twice, or object directory itself. 328 */ 329for(alt = alt_odb_list; alt; alt = alt->next) { 330if(!fspathcmp(path->buf, alt->path)) 331return0; 332} 333if(!fspathcmp(path->buf, normalized_objdir)) 334return0; 335 336return1; 337} 338 339/* 340 * Prepare alternate object database registry. 341 * 342 * The variable alt_odb_list points at the list of struct 343 * alternate_object_database. The elements on this list come from 344 * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT 345 * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates, 346 * whose contents is similar to that environment variable but can be 347 * LF separated. Its base points at a statically allocated buffer that 348 * contains "/the/directory/corresponding/to/.git/objects/...", while 349 * its name points just after the slash at the end of ".git/objects/" 350 * in the example above, and has enough space to hold 40-byte hex 351 * SHA1, an extra slash for the first level indirection, and the 352 * terminating NUL. 353 */ 354static intlink_alt_odb_entry(const char*entry,const char*relative_base, 355int depth,const char*normalized_objdir) 356{ 357struct alternate_object_database *ent; 358struct strbuf pathbuf = STRBUF_INIT; 359 360if(!is_absolute_path(entry) && relative_base) { 361strbuf_realpath(&pathbuf, relative_base,1); 362strbuf_addch(&pathbuf,'/'); 363} 364strbuf_addstr(&pathbuf, entry); 365 366if(strbuf_normalize_path(&pathbuf) <0&& relative_base) { 367error("unable to normalize alternate object path:%s", 368 pathbuf.buf); 369strbuf_release(&pathbuf); 370return-1; 371} 372 373/* 374 * The trailing slash after the directory name is given by 375 * this function at the end. Remove duplicates. 376 */ 377while(pathbuf.len && pathbuf.buf[pathbuf.len -1] =='/') 378strbuf_setlen(&pathbuf, pathbuf.len -1); 379 380if(!alt_odb_usable(&pathbuf, normalized_objdir)) { 381strbuf_release(&pathbuf); 382return-1; 383} 384 385 ent =alloc_alt_odb(pathbuf.buf); 386 387/* add the alternate entry */ 388*alt_odb_tail = ent; 389 alt_odb_tail = &(ent->next); 390 ent->next = NULL; 391 392/* recursively add alternates */ 393read_info_alternates(pathbuf.buf, depth +1); 394 395strbuf_release(&pathbuf); 396return0; 397} 398 399static const char*parse_alt_odb_entry(const char*string, 400int sep, 401struct strbuf *out) 402{ 403const char*end; 404 405strbuf_reset(out); 406 407if(*string =='#') { 408/* comment; consume up to next separator */ 409 end =strchrnul(string, sep); 410}else if(*string =='"'&& !unquote_c_style(out, string, &end)) { 411/* 412 * quoted path; unquote_c_style has copied the 413 * data for us and set "end". Broken quoting (e.g., 414 * an entry that doesn't end with a quote) falls 415 * back to the unquoted case below. 416 */ 417}else{ 418/* normal, unquoted path */ 419 end =strchrnul(string, sep); 420strbuf_add(out, string, end - string); 421} 422 423if(*end) 424 end++; 425return end; 426} 427 428static voidlink_alt_odb_entries(const char*alt,int len,int sep, 429const char*relative_base,int depth) 430{ 431struct strbuf objdirbuf = STRBUF_INIT; 432struct strbuf entry = STRBUF_INIT; 433 434if(depth >5) { 435error("%s: ignoring alternate object stores, nesting too deep.", 436 relative_base); 437return; 438} 439 440strbuf_add_absolute_path(&objdirbuf,get_object_directory()); 441if(strbuf_normalize_path(&objdirbuf) <0) 442die("unable to normalize object directory:%s", 443 objdirbuf.buf); 444 445while(*alt) { 446 alt =parse_alt_odb_entry(alt, sep, &entry); 447if(!entry.len) 448continue; 449link_alt_odb_entry(entry.buf, relative_base, depth, objdirbuf.buf); 450} 451strbuf_release(&entry); 452strbuf_release(&objdirbuf); 453} 454 455voidread_info_alternates(const char* relative_base,int depth) 456{ 457char*map; 458size_t mapsz; 459struct stat st; 460char*path; 461int fd; 462 463 path =xstrfmt("%s/info/alternates", relative_base); 464 fd =git_open(path); 465free(path); 466if(fd <0) 467return; 468if(fstat(fd, &st) || (st.st_size ==0)) { 469close(fd); 470return; 471} 472 mapsz =xsize_t(st.st_size); 473 map =xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd,0); 474close(fd); 475 476link_alt_odb_entries(map, mapsz,'\n', relative_base, depth); 477 478munmap(map, mapsz); 479} 480 481struct alternate_object_database *alloc_alt_odb(const char*dir) 482{ 483struct alternate_object_database *ent; 484 485FLEX_ALLOC_STR(ent, path, dir); 486strbuf_init(&ent->scratch,0); 487strbuf_addf(&ent->scratch,"%s/", dir); 488 ent->base_len = ent->scratch.len; 489 490return ent; 491} 492 493voidadd_to_alternates_file(const char*reference) 494{ 495struct lock_file *lock =xcalloc(1,sizeof(struct lock_file)); 496char*alts =git_pathdup("objects/info/alternates"); 497FILE*in, *out; 498 499hold_lock_file_for_update(lock, alts, LOCK_DIE_ON_ERROR); 500 out =fdopen_lock_file(lock,"w"); 501if(!out) 502die_errno("unable to fdopen alternates lockfile"); 503 504 in =fopen(alts,"r"); 505if(in) { 506struct strbuf line = STRBUF_INIT; 507int found =0; 508 509while(strbuf_getline(&line, in) != EOF) { 510if(!strcmp(reference, line.buf)) { 511 found =1; 512break; 513} 514fprintf_or_die(out,"%s\n", line.buf); 515} 516 517strbuf_release(&line); 518fclose(in); 519 520if(found) { 521rollback_lock_file(lock); 522 lock = NULL; 523} 524} 525else if(errno != ENOENT) 526die_errno("unable to read alternates file"); 527 528if(lock) { 529fprintf_or_die(out,"%s\n", reference); 530if(commit_lock_file(lock)) 531die_errno("unable to move new alternates file into place"); 532if(alt_odb_tail) 533link_alt_odb_entries(reference,strlen(reference),'\n', NULL,0); 534} 535free(alts); 536} 537 538voidadd_to_alternates_memory(const char*reference) 539{ 540/* 541 * Make sure alternates are initialized, or else our entry may be 542 * overwritten when they are. 543 */ 544prepare_alt_odb(); 545 546link_alt_odb_entries(reference,strlen(reference),'\n', NULL,0); 547} 548 549/* 550 * Compute the exact path an alternate is at and returns it. In case of 551 * error NULL is returned and the human readable error is added to `err` 552 * `path` may be relative and should point to $GITDIR. 553 * `err` must not be null. 554 */ 555char*compute_alternate_path(const char*path,struct strbuf *err) 556{ 557char*ref_git = NULL; 558const char*repo, *ref_git_s; 559int seen_error =0; 560 561 ref_git_s =real_path_if_valid(path); 562if(!ref_git_s) { 563 seen_error =1; 564strbuf_addf(err,_("path '%s' does not exist"), path); 565goto out; 566}else 567/* 568 * Beware: read_gitfile(), real_path() and mkpath() 569 * return static buffer 570 */ 571 ref_git =xstrdup(ref_git_s); 572 573 repo =read_gitfile(ref_git); 574if(!repo) 575 repo =read_gitfile(mkpath("%s/.git", ref_git)); 576if(repo) { 577free(ref_git); 578 ref_git =xstrdup(repo); 579} 580 581if(!repo &&is_directory(mkpath("%s/.git/objects", ref_git))) { 582char*ref_git_git =mkpathdup("%s/.git", ref_git); 583free(ref_git); 584 ref_git = ref_git_git; 585}else if(!is_directory(mkpath("%s/objects", ref_git))) { 586struct strbuf sb = STRBUF_INIT; 587 seen_error =1; 588if(get_common_dir(&sb, ref_git)) { 589strbuf_addf(err, 590_("reference repository '%s' as a linked " 591"checkout is not supported yet."), 592 path); 593goto out; 594} 595 596strbuf_addf(err,_("reference repository '%s' is not a " 597"local repository."), path); 598goto out; 599} 600 601if(!access(mkpath("%s/shallow", ref_git), F_OK)) { 602strbuf_addf(err,_("reference repository '%s' is shallow"), 603 path); 604 seen_error =1; 605goto out; 606} 607 608if(!access(mkpath("%s/info/grafts", ref_git), F_OK)) { 609strbuf_addf(err, 610_("reference repository '%s' is grafted"), 611 path); 612 seen_error =1; 613goto out; 614} 615 616out: 617if(seen_error) { 618free(ref_git); 619 ref_git = NULL; 620} 621 622return ref_git; 623} 624 625intforeach_alt_odb(alt_odb_fn fn,void*cb) 626{ 627struct alternate_object_database *ent; 628int r =0; 629 630prepare_alt_odb(); 631for(ent = alt_odb_list; ent; ent = ent->next) { 632 r =fn(ent, cb); 633if(r) 634break; 635} 636return r; 637} 638 639voidprepare_alt_odb(void) 640{ 641const char*alt; 642 643if(alt_odb_tail) 644return; 645 646 alt =getenv(ALTERNATE_DB_ENVIRONMENT); 647if(!alt) alt =""; 648 649 alt_odb_tail = &alt_odb_list; 650link_alt_odb_entries(alt,strlen(alt), PATH_SEP, NULL,0); 651 652read_info_alternates(get_object_directory(),0); 653} 654 655/* Returns 1 if we have successfully freshened the file, 0 otherwise. */ 656static intfreshen_file(const char*fn) 657{ 658struct utimbuf t; 659 t.actime = t.modtime =time(NULL); 660return!utime(fn, &t); 661} 662 663/* 664 * All of the check_and_freshen functions return 1 if the file exists and was 665 * freshened (if freshening was requested), 0 otherwise. If they return 666 * 0, you should not assume that it is safe to skip a write of the object (it 667 * either does not exist on disk, or has a stale mtime and may be subject to 668 * pruning). 669 */ 670static intcheck_and_freshen_file(const char*fn,int freshen) 671{ 672if(access(fn, F_OK)) 673return0; 674if(freshen && !freshen_file(fn)) 675return0; 676return1; 677} 678 679static intcheck_and_freshen_local(const unsigned char*sha1,int freshen) 680{ 681returncheck_and_freshen_file(sha1_file_name(sha1), freshen); 682} 683 684static intcheck_and_freshen_nonlocal(const unsigned char*sha1,int freshen) 685{ 686struct alternate_object_database *alt; 687prepare_alt_odb(); 688for(alt = alt_odb_list; alt; alt = alt->next) { 689const char*path =alt_sha1_path(alt, sha1); 690if(check_and_freshen_file(path, freshen)) 691return1; 692} 693return0; 694} 695 696static intcheck_and_freshen(const unsigned char*sha1,int freshen) 697{ 698returncheck_and_freshen_local(sha1, freshen) || 699check_and_freshen_nonlocal(sha1, freshen); 700} 701 702inthas_loose_object_nonlocal(const unsigned char*sha1) 703{ 704returncheck_and_freshen_nonlocal(sha1,0); 705} 706 707static inthas_loose_object(const unsigned char*sha1) 708{ 709returncheck_and_freshen(sha1,0); 710} 711 712static unsigned int pack_used_ctr; 713static unsigned int pack_mmap_calls; 714static unsigned int peak_pack_open_windows; 715static unsigned int pack_open_windows; 716static unsigned int pack_open_fds; 717static unsigned int pack_max_fds; 718static size_t peak_pack_mapped; 719static size_t pack_mapped; 720struct packed_git *packed_git; 721 722static struct mru packed_git_mru_storage; 723struct mru *packed_git_mru = &packed_git_mru_storage; 724 725voidpack_report(void) 726{ 727fprintf(stderr, 728"pack_report: getpagesize() =%10" SZ_FMT "\n" 729"pack_report: core.packedGitWindowSize =%10" SZ_FMT "\n" 730"pack_report: core.packedGitLimit =%10" SZ_FMT "\n", 731sz_fmt(getpagesize()), 732sz_fmt(packed_git_window_size), 733sz_fmt(packed_git_limit)); 734fprintf(stderr, 735"pack_report: pack_used_ctr =%10u\n" 736"pack_report: pack_mmap_calls =%10u\n" 737"pack_report: pack_open_windows =%10u /%10u\n" 738"pack_report: pack_mapped = " 739"%10" SZ_FMT " /%10" SZ_FMT "\n", 740 pack_used_ctr, 741 pack_mmap_calls, 742 pack_open_windows, peak_pack_open_windows, 743sz_fmt(pack_mapped),sz_fmt(peak_pack_mapped)); 744} 745 746/* 747 * Open and mmap the index file at path, perform a couple of 748 * consistency checks, then record its information to p. Return 0 on 749 * success. 750 */ 751static intcheck_packed_git_idx(const char*path,struct packed_git *p) 752{ 753void*idx_map; 754struct pack_idx_header *hdr; 755size_t idx_size; 756uint32_t version, nr, i, *index; 757int fd =git_open(path); 758struct stat st; 759 760if(fd <0) 761return-1; 762if(fstat(fd, &st)) { 763close(fd); 764return-1; 765} 766 idx_size =xsize_t(st.st_size); 767if(idx_size <4*256+20+20) { 768close(fd); 769returnerror("index file%sis too small", path); 770} 771 idx_map =xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd,0); 772close(fd); 773 774 hdr = idx_map; 775if(hdr->idx_signature ==htonl(PACK_IDX_SIGNATURE)) { 776 version =ntohl(hdr->idx_version); 777if(version <2|| version >2) { 778munmap(idx_map, idx_size); 779returnerror("index file%sis version %"PRIu32 780" and is not supported by this binary" 781" (try upgrading GIT to a newer version)", 782 path, version); 783} 784}else 785 version =1; 786 787 nr =0; 788 index = idx_map; 789if(version >1) 790 index +=2;/* skip index header */ 791for(i =0; i <256; i++) { 792uint32_t n =ntohl(index[i]); 793if(n < nr) { 794munmap(idx_map, idx_size); 795returnerror("non-monotonic index%s", path); 796} 797 nr = n; 798} 799 800if(version ==1) { 801/* 802 * Total size: 803 * - 256 index entries 4 bytes each 804 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset) 805 * - 20-byte SHA1 of the packfile 806 * - 20-byte SHA1 file checksum 807 */ 808if(idx_size !=4*256+ nr *24+20+20) { 809munmap(idx_map, idx_size); 810returnerror("wrong index v1 file size in%s", path); 811} 812}else if(version ==2) { 813/* 814 * Minimum size: 815 * - 8 bytes of header 816 * - 256 index entries 4 bytes each 817 * - 20-byte sha1 entry * nr 818 * - 4-byte crc entry * nr 819 * - 4-byte offset entry * nr 820 * - 20-byte SHA1 of the packfile 821 * - 20-byte SHA1 file checksum 822 * And after the 4-byte offset table might be a 823 * variable sized table containing 8-byte entries 824 * for offsets larger than 2^31. 825 */ 826unsigned long min_size =8+4*256+ nr*(20+4+4) +20+20; 827unsigned long max_size = min_size; 828if(nr) 829 max_size += (nr -1)*8; 830if(idx_size < min_size || idx_size > max_size) { 831munmap(idx_map, idx_size); 832returnerror("wrong index v2 file size in%s", path); 833} 834if(idx_size != min_size && 835/* 836 * make sure we can deal with large pack offsets. 837 * 31-bit signed offset won't be enough, neither 838 * 32-bit unsigned one will be. 839 */ 840(sizeof(off_t) <=4)) { 841munmap(idx_map, idx_size); 842returnerror("pack too large for current definition of off_t in%s", path); 843} 844} 845 846 p->index_version = version; 847 p->index_data = idx_map; 848 p->index_size = idx_size; 849 p->num_objects = nr; 850return0; 851} 852 853intopen_pack_index(struct packed_git *p) 854{ 855char*idx_name; 856size_t len; 857int ret; 858 859if(p->index_data) 860return0; 861 862if(!strip_suffix(p->pack_name,".pack", &len)) 863die("BUG: pack_name does not end in .pack"); 864 idx_name =xstrfmt("%.*s.idx", (int)len, p->pack_name); 865 ret =check_packed_git_idx(idx_name, p); 866free(idx_name); 867return ret; 868} 869 870static voidscan_windows(struct packed_git *p, 871struct packed_git **lru_p, 872struct pack_window **lru_w, 873struct pack_window **lru_l) 874{ 875struct pack_window *w, *w_l; 876 877for(w_l = NULL, w = p->windows; w; w = w->next) { 878if(!w->inuse_cnt) { 879if(!*lru_w || w->last_used < (*lru_w)->last_used) { 880*lru_p = p; 881*lru_w = w; 882*lru_l = w_l; 883} 884} 885 w_l = w; 886} 887} 888 889static intunuse_one_window(struct packed_git *current) 890{ 891struct packed_git *p, *lru_p = NULL; 892struct pack_window *lru_w = NULL, *lru_l = NULL; 893 894if(current) 895scan_windows(current, &lru_p, &lru_w, &lru_l); 896for(p = packed_git; p; p = p->next) 897scan_windows(p, &lru_p, &lru_w, &lru_l); 898if(lru_p) { 899munmap(lru_w->base, lru_w->len); 900 pack_mapped -= lru_w->len; 901if(lru_l) 902 lru_l->next = lru_w->next; 903else 904 lru_p->windows = lru_w->next; 905free(lru_w); 906 pack_open_windows--; 907return1; 908} 909return0; 910} 911 912voidrelease_pack_memory(size_t need) 913{ 914size_t cur = pack_mapped; 915while(need >= (cur - pack_mapped) &&unuse_one_window(NULL)) 916;/* nothing */ 917} 918 919static voidmmap_limit_check(size_t length) 920{ 921static size_t limit =0; 922if(!limit) { 923 limit =git_env_ulong("GIT_MMAP_LIMIT",0); 924if(!limit) 925 limit = SIZE_MAX; 926} 927if(length > limit) 928die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX, 929(uintmax_t)length, (uintmax_t)limit); 930} 931 932void*xmmap_gently(void*start,size_t length, 933int prot,int flags,int fd, off_t offset) 934{ 935void*ret; 936 937mmap_limit_check(length); 938 ret =mmap(start, length, prot, flags, fd, offset); 939if(ret == MAP_FAILED) { 940if(!length) 941return NULL; 942release_pack_memory(length); 943 ret =mmap(start, length, prot, flags, fd, offset); 944} 945return ret; 946} 947 948void*xmmap(void*start,size_t length, 949int prot,int flags,int fd, off_t offset) 950{ 951void*ret =xmmap_gently(start, length, prot, flags, fd, offset); 952if(ret == MAP_FAILED) 953die_errno("mmap failed"); 954return ret; 955} 956 957voidclose_pack_windows(struct packed_git *p) 958{ 959while(p->windows) { 960struct pack_window *w = p->windows; 961 962if(w->inuse_cnt) 963die("pack '%s' still has open windows to it", 964 p->pack_name); 965munmap(w->base, w->len); 966 pack_mapped -= w->len; 967 pack_open_windows--; 968 p->windows = w->next; 969free(w); 970} 971} 972 973static intclose_pack_fd(struct packed_git *p) 974{ 975if(p->pack_fd <0) 976return0; 977 978close(p->pack_fd); 979 pack_open_fds--; 980 p->pack_fd = -1; 981 982return1; 983} 984 985static voidclose_pack(struct packed_git *p) 986{ 987close_pack_windows(p); 988close_pack_fd(p); 989close_pack_index(p); 990} 991 992voidclose_all_packs(void) 993{ 994struct packed_git *p; 995 996for(p = packed_git; p; p = p->next) 997if(p->do_not_close) 998die("BUG: want to close pack marked 'do-not-close'"); 999else1000close_pack(p);1001}100210031004/*1005 * The LRU pack is the one with the oldest MRU window, preferring packs1006 * with no used windows, or the oldest mtime if it has no windows allocated.1007 */1008static voidfind_lru_pack(struct packed_git *p,struct packed_git **lru_p,struct pack_window **mru_w,int*accept_windows_inuse)1009{1010struct pack_window *w, *this_mru_w;1011int has_windows_inuse =0;10121013/*1014 * Reject this pack if it has windows and the previously selected1015 * one does not. If this pack does not have windows, reject1016 * it if the pack file is newer than the previously selected one.1017 */1018if(*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))1019return;10201021for(w = this_mru_w = p->windows; w; w = w->next) {1022/*1023 * Reject this pack if any of its windows are in use,1024 * but the previously selected pack did not have any1025 * inuse windows. Otherwise, record that this pack1026 * has windows in use.1027 */1028if(w->inuse_cnt) {1029if(*accept_windows_inuse)1030 has_windows_inuse =1;1031else1032return;1033}10341035if(w->last_used > this_mru_w->last_used)1036 this_mru_w = w;10371038/*1039 * Reject this pack if it has windows that have been1040 * used more recently than the previously selected pack.1041 * If the previously selected pack had windows inuse and1042 * we have not encountered a window in this pack that is1043 * inuse, skip this check since we prefer a pack with no1044 * inuse windows to one that has inuse windows.1045 */1046if(*mru_w && *accept_windows_inuse == has_windows_inuse &&1047 this_mru_w->last_used > (*mru_w)->last_used)1048return;1049}10501051/*1052 * Select this pack.1053 */1054*mru_w = this_mru_w;1055*lru_p = p;1056*accept_windows_inuse = has_windows_inuse;1057}10581059static intclose_one_pack(void)1060{1061struct packed_git *p, *lru_p = NULL;1062struct pack_window *mru_w = NULL;1063int accept_windows_inuse =1;10641065for(p = packed_git; p; p = p->next) {1066if(p->pack_fd == -1)1067continue;1068find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);1069}10701071if(lru_p)1072returnclose_pack_fd(lru_p);10731074return0;1075}10761077voidunuse_pack(struct pack_window **w_cursor)1078{1079struct pack_window *w = *w_cursor;1080if(w) {1081 w->inuse_cnt--;1082*w_cursor = NULL;1083}1084}10851086voidclose_pack_index(struct packed_git *p)1087{1088if(p->index_data) {1089munmap((void*)p->index_data, p->index_size);1090 p->index_data = NULL;1091}1092}10931094static unsigned intget_max_fd_limit(void)1095{1096#ifdef RLIMIT_NOFILE1097{1098struct rlimit lim;10991100if(!getrlimit(RLIMIT_NOFILE, &lim))1101return lim.rlim_cur;1102}1103#endif11041105#ifdef _SC_OPEN_MAX1106{1107long open_max =sysconf(_SC_OPEN_MAX);1108if(0< open_max)1109return open_max;1110/*1111 * Otherwise, we got -1 for one of the two1112 * reasons:1113 *1114 * (1) sysconf() did not understand _SC_OPEN_MAX1115 * and signaled an error with -1; or1116 * (2) sysconf() said there is no limit.1117 *1118 * We _could_ clear errno before calling sysconf() to1119 * tell these two cases apart and return a huge number1120 * in the latter case to let the caller cap it to a1121 * value that is not so selfish, but letting the1122 * fallback OPEN_MAX codepath take care of these cases1123 * is a lot simpler.1124 */1125}1126#endif11271128#ifdef OPEN_MAX1129return OPEN_MAX;1130#else1131return1;/* see the caller ;-) */1132#endif1133}11341135/*1136 * Do not call this directly as this leaks p->pack_fd on error return;1137 * call open_packed_git() instead.1138 */1139static intopen_packed_git_1(struct packed_git *p)1140{1141struct stat st;1142struct pack_header hdr;1143unsigned char sha1[20];1144unsigned char*idx_sha1;1145long fd_flag;11461147if(!p->index_data &&open_pack_index(p))1148returnerror("packfile%sindex unavailable", p->pack_name);11491150if(!pack_max_fds) {1151unsigned int max_fds =get_max_fd_limit();11521153/* Save 3 for stdin/stdout/stderr, 22 for work */1154if(25< max_fds)1155 pack_max_fds = max_fds -25;1156else1157 pack_max_fds =1;1158}11591160while(pack_max_fds <= pack_open_fds &&close_one_pack())1161;/* nothing */11621163 p->pack_fd =git_open(p->pack_name);1164if(p->pack_fd <0||fstat(p->pack_fd, &st))1165return-1;1166 pack_open_fds++;11671168/* If we created the struct before we had the pack we lack size. */1169if(!p->pack_size) {1170if(!S_ISREG(st.st_mode))1171returnerror("packfile%snot a regular file", p->pack_name);1172 p->pack_size = st.st_size;1173}else if(p->pack_size != st.st_size)1174returnerror("packfile%ssize changed", p->pack_name);11751176/* We leave these file descriptors open with sliding mmap;1177 * there is no point keeping them open across exec(), though.1178 */1179 fd_flag =fcntl(p->pack_fd, F_GETFD,0);1180if(fd_flag <0)1181returnerror("cannot determine file descriptor flags");1182 fd_flag |= FD_CLOEXEC;1183if(fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)1184returnerror("cannot set FD_CLOEXEC");11851186/* Verify we recognize this pack file format. */1187if(read_in_full(p->pack_fd, &hdr,sizeof(hdr)) !=sizeof(hdr))1188returnerror("file%sis far too short to be a packfile", p->pack_name);1189if(hdr.hdr_signature !=htonl(PACK_SIGNATURE))1190returnerror("file%sis not a GIT packfile", p->pack_name);1191if(!pack_version_ok(hdr.hdr_version))1192returnerror("packfile%sis version %"PRIu32" and not"1193" supported (try upgrading GIT to a newer version)",1194 p->pack_name,ntohl(hdr.hdr_version));11951196/* Verify the pack matches its index. */1197if(p->num_objects !=ntohl(hdr.hdr_entries))1198returnerror("packfile%sclaims to have %"PRIu32" objects"1199" while index indicates %"PRIu32" objects",1200 p->pack_name,ntohl(hdr.hdr_entries),1201 p->num_objects);1202if(lseek(p->pack_fd, p->pack_size -sizeof(sha1), SEEK_SET) == -1)1203returnerror("end of packfile%sis unavailable", p->pack_name);1204if(read_in_full(p->pack_fd, sha1,sizeof(sha1)) !=sizeof(sha1))1205returnerror("packfile%ssignature is unavailable", p->pack_name);1206 idx_sha1 = ((unsigned char*)p->index_data) + p->index_size -40;1207if(hashcmp(sha1, idx_sha1))1208returnerror("packfile%sdoes not match index", p->pack_name);1209return0;1210}12111212static intopen_packed_git(struct packed_git *p)1213{1214if(!open_packed_git_1(p))1215return0;1216close_pack_fd(p);1217return-1;1218}12191220static intin_window(struct pack_window *win, off_t offset)1221{1222/* We must promise at least 20 bytes (one hash) after the1223 * offset is available from this window, otherwise the offset1224 * is not actually in this window and a different window (which1225 * has that one hash excess) must be used. This is to support1226 * the object header and delta base parsing routines below.1227 */1228 off_t win_off = win->offset;1229return win_off <= offset1230&& (offset +20) <= (win_off + win->len);1231}12321233unsigned char*use_pack(struct packed_git *p,1234struct pack_window **w_cursor,1235 off_t offset,1236unsigned long*left)1237{1238struct pack_window *win = *w_cursor;12391240/* Since packfiles end in a hash of their content and it's1241 * pointless to ask for an offset into the middle of that1242 * hash, and the in_window function above wouldn't match1243 * don't allow an offset too close to the end of the file.1244 */1245if(!p->pack_size && p->pack_fd == -1&&open_packed_git(p))1246die("packfile%scannot be accessed", p->pack_name);1247if(offset > (p->pack_size -20))1248die("offset beyond end of packfile (truncated pack?)");1249if(offset <0)1250die(_("offset before end of packfile (broken .idx?)"));12511252if(!win || !in_window(win, offset)) {1253if(win)1254 win->inuse_cnt--;1255for(win = p->windows; win; win = win->next) {1256if(in_window(win, offset))1257break;1258}1259if(!win) {1260size_t window_align = packed_git_window_size /2;1261 off_t len;12621263if(p->pack_fd == -1&&open_packed_git(p))1264die("packfile%scannot be accessed", p->pack_name);12651266 win =xcalloc(1,sizeof(*win));1267 win->offset = (offset / window_align) * window_align;1268 len = p->pack_size - win->offset;1269if(len > packed_git_window_size)1270 len = packed_git_window_size;1271 win->len = (size_t)len;1272 pack_mapped += win->len;1273while(packed_git_limit < pack_mapped1274&&unuse_one_window(p))1275;/* nothing */1276 win->base =xmmap(NULL, win->len,1277 PROT_READ, MAP_PRIVATE,1278 p->pack_fd, win->offset);1279if(win->base == MAP_FAILED)1280die_errno("packfile%scannot be mapped",1281 p->pack_name);1282if(!win->offset && win->len == p->pack_size1283&& !p->do_not_close)1284close_pack_fd(p);1285 pack_mmap_calls++;1286 pack_open_windows++;1287if(pack_mapped > peak_pack_mapped)1288 peak_pack_mapped = pack_mapped;1289if(pack_open_windows > peak_pack_open_windows)1290 peak_pack_open_windows = pack_open_windows;1291 win->next = p->windows;1292 p->windows = win;1293}1294}1295if(win != *w_cursor) {1296 win->last_used = pack_used_ctr++;1297 win->inuse_cnt++;1298*w_cursor = win;1299}1300 offset -= win->offset;1301if(left)1302*left = win->len -xsize_t(offset);1303return win->base + offset;1304}13051306static struct packed_git *alloc_packed_git(int extra)1307{1308struct packed_git *p =xmalloc(st_add(sizeof(*p), extra));1309memset(p,0,sizeof(*p));1310 p->pack_fd = -1;1311return p;1312}13131314static voidtry_to_free_pack_memory(size_t size)1315{1316release_pack_memory(size);1317}13181319struct packed_git *add_packed_git(const char*path,size_t path_len,int local)1320{1321static int have_set_try_to_free_routine;1322struct stat st;1323size_t alloc;1324struct packed_git *p;13251326if(!have_set_try_to_free_routine) {1327 have_set_try_to_free_routine =1;1328set_try_to_free_routine(try_to_free_pack_memory);1329}13301331/*1332 * Make sure a corresponding .pack file exists and that1333 * the index looks sane.1334 */1335if(!strip_suffix_mem(path, &path_len,".idx"))1336return NULL;13371338/*1339 * ".pack" is long enough to hold any suffix we're adding (and1340 * the use xsnprintf double-checks that)1341 */1342 alloc =st_add3(path_len,strlen(".pack"),1);1343 p =alloc_packed_git(alloc);1344memcpy(p->pack_name, path, path_len);13451346xsnprintf(p->pack_name + path_len, alloc - path_len,".keep");1347if(!access(p->pack_name, F_OK))1348 p->pack_keep =1;13491350xsnprintf(p->pack_name + path_len, alloc - path_len,".pack");1351if(stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {1352free(p);1353return NULL;1354}13551356/* ok, it looks sane as far as we can check without1357 * actually mapping the pack file.1358 */1359 p->pack_size = st.st_size;1360 p->pack_local = local;1361 p->mtime = st.st_mtime;1362if(path_len <40||get_sha1_hex(path + path_len -40, p->sha1))1363hashclr(p->sha1);1364return p;1365}13661367struct packed_git *parse_pack_index(unsigned char*sha1,const char*idx_path)1368{1369const char*path =sha1_pack_name(sha1);1370size_t alloc =st_add(strlen(path),1);1371struct packed_git *p =alloc_packed_git(alloc);13721373memcpy(p->pack_name, path, alloc);/* includes NUL */1374hashcpy(p->sha1, sha1);1375if(check_packed_git_idx(idx_path, p)) {1376free(p);1377return NULL;1378}13791380return p;1381}13821383voidinstall_packed_git(struct packed_git *pack)1384{1385if(pack->pack_fd != -1)1386 pack_open_fds++;13871388 pack->next = packed_git;1389 packed_git = pack;1390}13911392void(*report_garbage)(unsigned seen_bits,const char*path);13931394static voidreport_helper(const struct string_list *list,1395int seen_bits,int first,int last)1396{1397if(seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))1398return;13991400for(; first < last; first++)1401report_garbage(seen_bits, list->items[first].string);1402}14031404static voidreport_pack_garbage(struct string_list *list)1405{1406int i, baselen = -1, first =0, seen_bits =0;14071408if(!report_garbage)1409return;14101411string_list_sort(list);14121413for(i =0; i < list->nr; i++) {1414const char*path = list->items[i].string;1415if(baselen != -1&&1416strncmp(path, list->items[first].string, baselen)) {1417report_helper(list, seen_bits, first, i);1418 baselen = -1;1419 seen_bits =0;1420}1421if(baselen == -1) {1422const char*dot =strrchr(path,'.');1423if(!dot) {1424report_garbage(PACKDIR_FILE_GARBAGE, path);1425continue;1426}1427 baselen = dot - path +1;1428 first = i;1429}1430if(!strcmp(path + baselen,"pack"))1431 seen_bits |=1;1432else if(!strcmp(path + baselen,"idx"))1433 seen_bits |=2;1434}1435report_helper(list, seen_bits, first, list->nr);1436}14371438static voidprepare_packed_git_one(char*objdir,int local)1439{1440struct strbuf path = STRBUF_INIT;1441size_t dirnamelen;1442DIR*dir;1443struct dirent *de;1444struct string_list garbage = STRING_LIST_INIT_DUP;14451446strbuf_addstr(&path, objdir);1447strbuf_addstr(&path,"/pack");1448 dir =opendir(path.buf);1449if(!dir) {1450if(errno != ENOENT)1451error_errno("unable to open object pack directory:%s",1452 path.buf);1453strbuf_release(&path);1454return;1455}1456strbuf_addch(&path,'/');1457 dirnamelen = path.len;1458while((de =readdir(dir)) != NULL) {1459struct packed_git *p;1460size_t base_len;14611462if(is_dot_or_dotdot(de->d_name))1463continue;14641465strbuf_setlen(&path, dirnamelen);1466strbuf_addstr(&path, de->d_name);14671468 base_len = path.len;1469if(strip_suffix_mem(path.buf, &base_len,".idx")) {1470/* Don't reopen a pack we already have. */1471for(p = packed_git; p; p = p->next) {1472size_t len;1473if(strip_suffix(p->pack_name,".pack", &len) &&1474 len == base_len &&1475!memcmp(p->pack_name, path.buf, len))1476break;1477}1478if(p == NULL &&1479/*1480 * See if it really is a valid .idx file with1481 * corresponding .pack file that we can map.1482 */1483(p =add_packed_git(path.buf, path.len, local)) != NULL)1484install_packed_git(p);1485}14861487if(!report_garbage)1488continue;14891490if(ends_with(de->d_name,".idx") ||1491ends_with(de->d_name,".pack") ||1492ends_with(de->d_name,".bitmap") ||1493ends_with(de->d_name,".keep"))1494string_list_append(&garbage, path.buf);1495else1496report_garbage(PACKDIR_FILE_GARBAGE, path.buf);1497}1498closedir(dir);1499report_pack_garbage(&garbage);1500string_list_clear(&garbage,0);1501strbuf_release(&path);1502}15031504static int approximate_object_count_valid;15051506/*1507 * Give a fast, rough count of the number of objects in the repository. This1508 * ignores loose objects completely. If you have a lot of them, then either1509 * you should repack because your performance will be awful, or they are1510 * all unreachable objects about to be pruned, in which case they're not really1511 * interesting as a measure of repo size in the first place.1512 */1513unsigned longapproximate_object_count(void)1514{1515static unsigned long count;1516if(!approximate_object_count_valid) {1517struct packed_git *p;15181519prepare_packed_git();1520 count =0;1521for(p = packed_git; p; p = p->next) {1522if(open_pack_index(p))1523continue;1524 count += p->num_objects;1525}1526}1527return count;1528}15291530static void*get_next_packed_git(const void*p)1531{1532return((const struct packed_git *)p)->next;1533}15341535static voidset_next_packed_git(void*p,void*next)1536{1537((struct packed_git *)p)->next = next;1538}15391540static intsort_pack(const void*a_,const void*b_)1541{1542const struct packed_git *a = a_;1543const struct packed_git *b = b_;1544int st;15451546/*1547 * Local packs tend to contain objects specific to our1548 * variant of the project than remote ones. In addition,1549 * remote ones could be on a network mounted filesystem.1550 * Favor local ones for these reasons.1551 */1552 st = a->pack_local - b->pack_local;1553if(st)1554return-st;15551556/*1557 * Younger packs tend to contain more recent objects,1558 * and more recent objects tend to get accessed more1559 * often.1560 */1561if(a->mtime < b->mtime)1562return1;1563else if(a->mtime == b->mtime)1564return0;1565return-1;1566}15671568static voidrearrange_packed_git(void)1569{1570 packed_git =llist_mergesort(packed_git, get_next_packed_git,1571 set_next_packed_git, sort_pack);1572}15731574static voidprepare_packed_git_mru(void)1575{1576struct packed_git *p;15771578mru_clear(packed_git_mru);1579for(p = packed_git; p; p = p->next)1580mru_append(packed_git_mru, p);1581}15821583static int prepare_packed_git_run_once =0;1584voidprepare_packed_git(void)1585{1586struct alternate_object_database *alt;15871588if(prepare_packed_git_run_once)1589return;1590prepare_packed_git_one(get_object_directory(),1);1591prepare_alt_odb();1592for(alt = alt_odb_list; alt; alt = alt->next)1593prepare_packed_git_one(alt->path,0);1594rearrange_packed_git();1595prepare_packed_git_mru();1596 prepare_packed_git_run_once =1;1597}15981599voidreprepare_packed_git(void)1600{1601 approximate_object_count_valid =0;1602 prepare_packed_git_run_once =0;1603prepare_packed_git();1604}16051606static voidmark_bad_packed_object(struct packed_git *p,1607const unsigned char*sha1)1608{1609unsigned i;1610for(i =0; i < p->num_bad_objects; i++)1611if(!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))1612return;1613 p->bad_object_sha1 =xrealloc(p->bad_object_sha1,1614st_mult(GIT_SHA1_RAWSZ,1615st_add(p->num_bad_objects,1)));1616hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);1617 p->num_bad_objects++;1618}16191620static const struct packed_git *has_packed_and_bad(const unsigned char*sha1)1621{1622struct packed_git *p;1623unsigned i;16241625for(p = packed_git; p; p = p->next)1626for(i =0; i < p->num_bad_objects; i++)1627if(!hashcmp(sha1, p->bad_object_sha1 +20* i))1628return p;1629return NULL;1630}16311632/*1633 * With an in-core object data in "map", rehash it to make sure the1634 * object name actually matches "sha1" to detect object corruption.1635 * With "map" == NULL, try reading the object named with "sha1" using1636 * the streaming interface and rehash it to do the same.1637 */1638intcheck_sha1_signature(const unsigned char*sha1,void*map,1639unsigned long size,const char*type)1640{1641unsigned char real_sha1[20];1642enum object_type obj_type;1643struct git_istream *st;1644 git_SHA_CTX c;1645char hdr[32];1646int hdrlen;16471648if(map) {1649hash_sha1_file(map, size, type, real_sha1);1650returnhashcmp(sha1, real_sha1) ? -1:0;1651}16521653 st =open_istream(sha1, &obj_type, &size, NULL);1654if(!st)1655return-1;16561657/* Generate the header */1658 hdrlen =xsnprintf(hdr,sizeof(hdr),"%s %lu",typename(obj_type), size) +1;16591660/* Sha1.. */1661git_SHA1_Init(&c);1662git_SHA1_Update(&c, hdr, hdrlen);1663for(;;) {1664char buf[1024*16];1665 ssize_t readlen =read_istream(st, buf,sizeof(buf));16661667if(readlen <0) {1668close_istream(st);1669return-1;1670}1671if(!readlen)1672break;1673git_SHA1_Update(&c, buf, readlen);1674}1675git_SHA1_Final(real_sha1, &c);1676close_istream(st);1677returnhashcmp(sha1, real_sha1) ? -1:0;1678}16791680intgit_open_cloexec(const char*name,int flags)1681{1682int fd;1683static int o_cloexec = O_CLOEXEC;16841685 fd =open(name, flags | o_cloexec);1686if((o_cloexec & O_CLOEXEC) && fd <0&& errno == EINVAL) {1687/* Try again w/o O_CLOEXEC: the kernel might not support it */1688 o_cloexec &= ~O_CLOEXEC;1689 fd =open(name, flags | o_cloexec);1690}16911692#if defined(F_GETFL) && defined(F_SETFL) && defined(FD_CLOEXEC)1693{1694static int fd_cloexec = FD_CLOEXEC;16951696if(!o_cloexec &&0<= fd && fd_cloexec) {1697/* Opened w/o O_CLOEXEC? try with fcntl(2) to add it */1698int flags =fcntl(fd, F_GETFL);1699if(fcntl(fd, F_SETFL, flags | fd_cloexec))1700 fd_cloexec =0;1701}1702}1703#endif1704return fd;1705}17061707/*1708 * Find "sha1" as a loose object in the local repository or in an alternate.1709 * Returns 0 on success, negative on failure.1710 *1711 * The "path" out-parameter will give the path of the object we found (if any).1712 * Note that it may point to static storage and is only valid until another1713 * call to sha1_file_name(), etc.1714 */1715static intstat_sha1_file(const unsigned char*sha1,struct stat *st,1716const char**path)1717{1718struct alternate_object_database *alt;17191720*path =sha1_file_name(sha1);1721if(!lstat(*path, st))1722return0;17231724prepare_alt_odb();1725 errno = ENOENT;1726for(alt = alt_odb_list; alt; alt = alt->next) {1727*path =alt_sha1_path(alt, sha1);1728if(!lstat(*path, st))1729return0;1730}17311732return-1;1733}17341735/*1736 * Like stat_sha1_file(), but actually open the object and return the1737 * descriptor. See the caveats on the "path" parameter above.1738 */1739static intopen_sha1_file(const unsigned char*sha1,const char**path)1740{1741int fd;1742struct alternate_object_database *alt;1743int most_interesting_errno;17441745*path =sha1_file_name(sha1);1746 fd =git_open(*path);1747if(fd >=0)1748return fd;1749 most_interesting_errno = errno;17501751prepare_alt_odb();1752for(alt = alt_odb_list; alt; alt = alt->next) {1753*path =alt_sha1_path(alt, sha1);1754 fd =git_open(*path);1755if(fd >=0)1756return fd;1757if(most_interesting_errno == ENOENT)1758 most_interesting_errno = errno;1759}1760 errno = most_interesting_errno;1761return-1;1762}17631764/*1765 * Map the loose object at "path" if it is not NULL, or the path found by1766 * searching for a loose object named "sha1".1767 */1768static void*map_sha1_file_1(const char*path,1769const unsigned char*sha1,1770unsigned long*size)1771{1772void*map;1773int fd;17741775if(path)1776 fd =git_open(path);1777else1778 fd =open_sha1_file(sha1, &path);1779 map = NULL;1780if(fd >=0) {1781struct stat st;17821783if(!fstat(fd, &st)) {1784*size =xsize_t(st.st_size);1785if(!*size) {1786/* mmap() is forbidden on empty files */1787error("object file%sis empty", path);1788return NULL;1789}1790 map =xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd,0);1791}1792close(fd);1793}1794return map;1795}17961797void*map_sha1_file(const unsigned char*sha1,unsigned long*size)1798{1799returnmap_sha1_file_1(NULL, sha1, size);1800}18011802unsigned longunpack_object_header_buffer(const unsigned char*buf,1803unsigned long len,enum object_type *type,unsigned long*sizep)1804{1805unsigned shift;1806unsigned long size, c;1807unsigned long used =0;18081809 c = buf[used++];1810*type = (c >>4) &7;1811 size = c &15;1812 shift =4;1813while(c &0x80) {1814if(len <= used ||bitsizeof(long) <= shift) {1815error("bad object header");1816 size = used =0;1817break;1818}1819 c = buf[used++];1820 size += (c &0x7f) << shift;1821 shift +=7;1822}1823*sizep = size;1824return used;1825}18261827static intunpack_sha1_short_header(git_zstream *stream,1828unsigned char*map,unsigned long mapsize,1829void*buffer,unsigned long bufsiz)1830{1831/* Get the data stream */1832memset(stream,0,sizeof(*stream));1833 stream->next_in = map;1834 stream->avail_in = mapsize;1835 stream->next_out = buffer;1836 stream->avail_out = bufsiz;18371838git_inflate_init(stream);1839returngit_inflate(stream,0);1840}18411842intunpack_sha1_header(git_zstream *stream,1843unsigned char*map,unsigned long mapsize,1844void*buffer,unsigned long bufsiz)1845{1846int status =unpack_sha1_short_header(stream, map, mapsize,1847 buffer, bufsiz);18481849if(status < Z_OK)1850return status;18511852/* Make sure we have the terminating NUL */1853if(!memchr(buffer,'\0', stream->next_out - (unsigned char*)buffer))1854return-1;1855return0;1856}18571858static intunpack_sha1_header_to_strbuf(git_zstream *stream,unsigned char*map,1859unsigned long mapsize,void*buffer,1860unsigned long bufsiz,struct strbuf *header)1861{1862int status;18631864 status =unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz);1865if(status < Z_OK)1866return-1;18671868/*1869 * Check if entire header is unpacked in the first iteration.1870 */1871if(memchr(buffer,'\0', stream->next_out - (unsigned char*)buffer))1872return0;18731874/*1875 * buffer[0..bufsiz] was not large enough. Copy the partial1876 * result out to header, and then append the result of further1877 * reading the stream.1878 */1879strbuf_add(header, buffer, stream->next_out - (unsigned char*)buffer);1880 stream->next_out = buffer;1881 stream->avail_out = bufsiz;18821883do{1884 status =git_inflate(stream,0);1885strbuf_add(header, buffer, stream->next_out - (unsigned char*)buffer);1886if(memchr(buffer,'\0', stream->next_out - (unsigned char*)buffer))1887return0;1888 stream->next_out = buffer;1889 stream->avail_out = bufsiz;1890}while(status != Z_STREAM_END);1891return-1;1892}18931894static void*unpack_sha1_rest(git_zstream *stream,void*buffer,unsigned long size,const unsigned char*sha1)1895{1896int bytes =strlen(buffer) +1;1897unsigned char*buf =xmallocz(size);1898unsigned long n;1899int status = Z_OK;19001901 n = stream->total_out - bytes;1902if(n > size)1903 n = size;1904memcpy(buf, (char*) buffer + bytes, n);1905 bytes = n;1906if(bytes <= size) {1907/*1908 * The above condition must be (bytes <= size), not1909 * (bytes < size). In other words, even though we1910 * expect no more output and set avail_out to zero,1911 * the input zlib stream may have bytes that express1912 * "this concludes the stream", and we *do* want to1913 * eat that input.1914 *1915 * Otherwise we would not be able to test that we1916 * consumed all the input to reach the expected size;1917 * we also want to check that zlib tells us that all1918 * went well with status == Z_STREAM_END at the end.1919 */1920 stream->next_out = buf + bytes;1921 stream->avail_out = size - bytes;1922while(status == Z_OK)1923 status =git_inflate(stream, Z_FINISH);1924}1925if(status == Z_STREAM_END && !stream->avail_in) {1926git_inflate_end(stream);1927return buf;1928}19291930if(status <0)1931error("corrupt loose object '%s'",sha1_to_hex(sha1));1932else if(stream->avail_in)1933error("garbage at end of loose object '%s'",1934sha1_to_hex(sha1));1935free(buf);1936return NULL;1937}19381939/*1940 * We used to just use "sscanf()", but that's actually way1941 * too permissive for what we want to check. So do an anal1942 * object header parse by hand.1943 */1944static intparse_sha1_header_extended(const char*hdr,struct object_info *oi,1945unsigned int flags)1946{1947const char*type_buf = hdr;1948unsigned long size;1949int type, type_len =0;19501951/*1952 * The type can be of any size but is followed by1953 * a space.1954 */1955for(;;) {1956char c = *hdr++;1957if(!c)1958return-1;1959if(c ==' ')1960break;1961 type_len++;1962}19631964 type =type_from_string_gently(type_buf, type_len,1);1965if(oi->typename)1966strbuf_add(oi->typename, type_buf, type_len);1967/*1968 * Set type to 0 if its an unknown object and1969 * we're obtaining the type using '--allow-unknown-type'1970 * option.1971 */1972if((flags & LOOKUP_UNKNOWN_OBJECT) && (type <0))1973 type =0;1974else if(type <0)1975die("invalid object type");1976if(oi->typep)1977*oi->typep = type;19781979/*1980 * The length must follow immediately, and be in canonical1981 * decimal format (ie "010" is not valid).1982 */1983 size = *hdr++ -'0';1984if(size >9)1985return-1;1986if(size) {1987for(;;) {1988unsigned long c = *hdr -'0';1989if(c >9)1990break;1991 hdr++;1992 size = size *10+ c;1993}1994}19951996if(oi->sizep)1997*oi->sizep = size;19981999/*2000 * The length must be followed by a zero byte2001 */2002return*hdr ? -1: type;2003}20042005intparse_sha1_header(const char*hdr,unsigned long*sizep)2006{2007struct object_info oi = OBJECT_INFO_INIT;20082009 oi.sizep = sizep;2010returnparse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);2011}20122013static void*unpack_sha1_file(void*map,unsigned long mapsize,enum object_type *type,unsigned long*size,const unsigned char*sha1)2014{2015int ret;2016 git_zstream stream;2017char hdr[8192];20182019 ret =unpack_sha1_header(&stream, map, mapsize, hdr,sizeof(hdr));2020if(ret < Z_OK || (*type =parse_sha1_header(hdr, size)) <0)2021return NULL;20222023returnunpack_sha1_rest(&stream, hdr, *size, sha1);2024}20252026unsigned longget_size_from_delta(struct packed_git *p,2027struct pack_window **w_curs,2028 off_t curpos)2029{2030const unsigned char*data;2031unsigned char delta_head[20], *in;2032 git_zstream stream;2033int st;20342035memset(&stream,0,sizeof(stream));2036 stream.next_out = delta_head;2037 stream.avail_out =sizeof(delta_head);20382039git_inflate_init(&stream);2040do{2041 in =use_pack(p, w_curs, curpos, &stream.avail_in);2042 stream.next_in = in;2043 st =git_inflate(&stream, Z_FINISH);2044 curpos += stream.next_in - in;2045}while((st == Z_OK || st == Z_BUF_ERROR) &&2046 stream.total_out <sizeof(delta_head));2047git_inflate_end(&stream);2048if((st != Z_STREAM_END) && stream.total_out !=sizeof(delta_head)) {2049error("delta data unpack-initial failed");2050return0;2051}20522053/* Examine the initial part of the delta to figure out2054 * the result size.2055 */2056 data = delta_head;20572058/* ignore base size */2059get_delta_hdr_size(&data, delta_head+sizeof(delta_head));20602061/* Read the result size */2062returnget_delta_hdr_size(&data, delta_head+sizeof(delta_head));2063}20642065static off_t get_delta_base(struct packed_git *p,2066struct pack_window **w_curs,2067 off_t *curpos,2068enum object_type type,2069 off_t delta_obj_offset)2070{2071unsigned char*base_info =use_pack(p, w_curs, *curpos, NULL);2072 off_t base_offset;20732074/* use_pack() assured us we have [base_info, base_info + 20)2075 * as a range that we can look at without walking off the2076 * end of the mapped window. Its actually the hash size2077 * that is assured. An OFS_DELTA longer than the hash size2078 * is stupid, as then a REF_DELTA would be smaller to store.2079 */2080if(type == OBJ_OFS_DELTA) {2081unsigned used =0;2082unsigned char c = base_info[used++];2083 base_offset = c &127;2084while(c &128) {2085 base_offset +=1;2086if(!base_offset ||MSB(base_offset,7))2087return0;/* overflow */2088 c = base_info[used++];2089 base_offset = (base_offset <<7) + (c &127);2090}2091 base_offset = delta_obj_offset - base_offset;2092if(base_offset <=0|| base_offset >= delta_obj_offset)2093return0;/* out of bound */2094*curpos += used;2095}else if(type == OBJ_REF_DELTA) {2096/* The base entry _must_ be in the same pack */2097 base_offset =find_pack_entry_one(base_info, p);2098*curpos +=20;2099}else2100die("I am totally screwed");2101return base_offset;2102}21032104/*2105 * Like get_delta_base above, but we return the sha1 instead of the pack2106 * offset. This means it is cheaper for REF deltas (we do not have to do2107 * the final object lookup), but more expensive for OFS deltas (we2108 * have to load the revidx to convert the offset back into a sha1).2109 */2110static const unsigned char*get_delta_base_sha1(struct packed_git *p,2111struct pack_window **w_curs,2112 off_t curpos,2113enum object_type type,2114 off_t delta_obj_offset)2115{2116if(type == OBJ_REF_DELTA) {2117unsigned char*base =use_pack(p, w_curs, curpos, NULL);2118return base;2119}else if(type == OBJ_OFS_DELTA) {2120struct revindex_entry *revidx;2121 off_t base_offset =get_delta_base(p, w_curs, &curpos,2122 type, delta_obj_offset);21232124if(!base_offset)2125return NULL;21262127 revidx =find_pack_revindex(p, base_offset);2128if(!revidx)2129return NULL;21302131returnnth_packed_object_sha1(p, revidx->nr);2132}else2133return NULL;2134}21352136intunpack_object_header(struct packed_git *p,2137struct pack_window **w_curs,2138 off_t *curpos,2139unsigned long*sizep)2140{2141unsigned char*base;2142unsigned long left;2143unsigned long used;2144enum object_type type;21452146/* use_pack() assures us we have [base, base + 20) available2147 * as a range that we can look at. (Its actually the hash2148 * size that is assured.) With our object header encoding2149 * the maximum deflated object size is 2^137, which is just2150 * insane, so we know won't exceed what we have been given.2151 */2152 base =use_pack(p, w_curs, *curpos, &left);2153 used =unpack_object_header_buffer(base, left, &type, sizep);2154if(!used) {2155 type = OBJ_BAD;2156}else2157*curpos += used;21582159return type;2160}21612162static intretry_bad_packed_offset(struct packed_git *p, off_t obj_offset)2163{2164int type;2165struct revindex_entry *revidx;2166const unsigned char*sha1;2167 revidx =find_pack_revindex(p, obj_offset);2168if(!revidx)2169return OBJ_BAD;2170 sha1 =nth_packed_object_sha1(p, revidx->nr);2171mark_bad_packed_object(p, sha1);2172 type =sha1_object_info(sha1, NULL);2173if(type <= OBJ_NONE)2174return OBJ_BAD;2175return type;2176}21772178#define POI_STACK_PREALLOC 6421792180static enum object_type packed_to_object_type(struct packed_git *p,2181 off_t obj_offset,2182enum object_type type,2183struct pack_window **w_curs,2184 off_t curpos)2185{2186 off_t small_poi_stack[POI_STACK_PREALLOC];2187 off_t *poi_stack = small_poi_stack;2188int poi_stack_nr =0, poi_stack_alloc = POI_STACK_PREALLOC;21892190while(type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {2191 off_t base_offset;2192unsigned long size;2193/* Push the object we're going to leave behind */2194if(poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {2195 poi_stack_alloc =alloc_nr(poi_stack_nr);2196ALLOC_ARRAY(poi_stack, poi_stack_alloc);2197memcpy(poi_stack, small_poi_stack,sizeof(off_t)*poi_stack_nr);2198}else{2199ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);2200}2201 poi_stack[poi_stack_nr++] = obj_offset;2202/* If parsing the base offset fails, just unwind */2203 base_offset =get_delta_base(p, w_curs, &curpos, type, obj_offset);2204if(!base_offset)2205goto unwind;2206 curpos = obj_offset = base_offset;2207 type =unpack_object_header(p, w_curs, &curpos, &size);2208if(type <= OBJ_NONE) {2209/* If getting the base itself fails, we first2210 * retry the base, otherwise unwind */2211 type =retry_bad_packed_offset(p, base_offset);2212if(type > OBJ_NONE)2213goto out;2214goto unwind;2215}2216}22172218switch(type) {2219case OBJ_BAD:2220case OBJ_COMMIT:2221case OBJ_TREE:2222case OBJ_BLOB:2223case OBJ_TAG:2224break;2225default:2226error("unknown object type%iat offset %"PRIuMAX" in%s",2227 type, (uintmax_t)obj_offset, p->pack_name);2228 type = OBJ_BAD;2229}22302231out:2232if(poi_stack != small_poi_stack)2233free(poi_stack);2234return type;22352236unwind:2237while(poi_stack_nr) {2238 obj_offset = poi_stack[--poi_stack_nr];2239 type =retry_bad_packed_offset(p, obj_offset);2240if(type > OBJ_NONE)2241goto out;2242}2243 type = OBJ_BAD;2244goto out;2245}22462247intpacked_object_info(struct packed_git *p, off_t obj_offset,2248struct object_info *oi)2249{2250struct pack_window *w_curs = NULL;2251unsigned long size;2252 off_t curpos = obj_offset;2253enum object_type type;22542255/*2256 * We always get the representation type, but only convert it to2257 * a "real" type later if the caller is interested.2258 */2259 type =unpack_object_header(p, &w_curs, &curpos, &size);22602261if(oi->sizep) {2262if(type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {2263 off_t tmp_pos = curpos;2264 off_t base_offset =get_delta_base(p, &w_curs, &tmp_pos,2265 type, obj_offset);2266if(!base_offset) {2267 type = OBJ_BAD;2268goto out;2269}2270*oi->sizep =get_size_from_delta(p, &w_curs, tmp_pos);2271if(*oi->sizep ==0) {2272 type = OBJ_BAD;2273goto out;2274}2275}else{2276*oi->sizep = size;2277}2278}22792280if(oi->disk_sizep) {2281struct revindex_entry *revidx =find_pack_revindex(p, obj_offset);2282*oi->disk_sizep = revidx[1].offset - obj_offset;2283}22842285if(oi->typep) {2286*oi->typep =packed_to_object_type(p, obj_offset, type, &w_curs, curpos);2287if(*oi->typep <0) {2288 type = OBJ_BAD;2289goto out;2290}2291}22922293if(oi->delta_base_sha1) {2294if(type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {2295const unsigned char*base;22962297 base =get_delta_base_sha1(p, &w_curs, curpos,2298 type, obj_offset);2299if(!base) {2300 type = OBJ_BAD;2301goto out;2302}23032304hashcpy(oi->delta_base_sha1, base);2305}else2306hashclr(oi->delta_base_sha1);2307}23082309out:2310unuse_pack(&w_curs);2311return type;2312}23132314static void*unpack_compressed_entry(struct packed_git *p,2315struct pack_window **w_curs,2316 off_t curpos,2317unsigned long size)2318{2319int st;2320 git_zstream stream;2321unsigned char*buffer, *in;23222323 buffer =xmallocz_gently(size);2324if(!buffer)2325return NULL;2326memset(&stream,0,sizeof(stream));2327 stream.next_out = buffer;2328 stream.avail_out = size +1;23292330git_inflate_init(&stream);2331do{2332 in =use_pack(p, w_curs, curpos, &stream.avail_in);2333 stream.next_in = in;2334 st =git_inflate(&stream, Z_FINISH);2335if(!stream.avail_out)2336break;/* the payload is larger than it should be */2337 curpos += stream.next_in - in;2338}while(st == Z_OK || st == Z_BUF_ERROR);2339git_inflate_end(&stream);2340if((st != Z_STREAM_END) || stream.total_out != size) {2341free(buffer);2342return NULL;2343}23442345return buffer;2346}23472348static struct hashmap delta_base_cache;2349static size_t delta_base_cached;23502351staticLIST_HEAD(delta_base_cache_lru);23522353struct delta_base_cache_key {2354struct packed_git *p;2355 off_t base_offset;2356};23572358struct delta_base_cache_entry {2359struct hashmap hash;2360struct delta_base_cache_key key;2361struct list_head lru;2362void*data;2363unsigned long size;2364enum object_type type;2365};23662367static unsigned intpack_entry_hash(struct packed_git *p, off_t base_offset)2368{2369unsigned int hash;23702371 hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;2372 hash += (hash >>8) + (hash >>16);2373return hash;2374}23752376static struct delta_base_cache_entry *2377get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)2378{2379struct hashmap_entry entry;2380struct delta_base_cache_key key;23812382if(!delta_base_cache.cmpfn)2383return NULL;23842385hashmap_entry_init(&entry,pack_entry_hash(p, base_offset));2386 key.p = p;2387 key.base_offset = base_offset;2388returnhashmap_get(&delta_base_cache, &entry, &key);2389}23902391static intdelta_base_cache_key_eq(const struct delta_base_cache_key *a,2392const struct delta_base_cache_key *b)2393{2394return a->p == b->p && a->base_offset == b->base_offset;2395}23962397static intdelta_base_cache_hash_cmp(const void*va,const void*vb,2398const void*vkey)2399{2400const struct delta_base_cache_entry *a = va, *b = vb;2401const struct delta_base_cache_key *key = vkey;2402if(key)2403return!delta_base_cache_key_eq(&a->key, key);2404else2405return!delta_base_cache_key_eq(&a->key, &b->key);2406}24072408static intin_delta_base_cache(struct packed_git *p, off_t base_offset)2409{2410return!!get_delta_base_cache_entry(p, base_offset);2411}24122413/*2414 * Remove the entry from the cache, but do _not_ free the associated2415 * entry data. The caller takes ownership of the "data" buffer, and2416 * should copy out any fields it wants before detaching.2417 */2418static voiddetach_delta_base_cache_entry(struct delta_base_cache_entry *ent)2419{2420hashmap_remove(&delta_base_cache, ent, &ent->key);2421list_del(&ent->lru);2422 delta_base_cached -= ent->size;2423free(ent);2424}24252426static void*cache_or_unpack_entry(struct packed_git *p, off_t base_offset,2427unsigned long*base_size,enum object_type *type)2428{2429struct delta_base_cache_entry *ent;24302431 ent =get_delta_base_cache_entry(p, base_offset);2432if(!ent)2433returnunpack_entry(p, base_offset, type, base_size);24342435*type = ent->type;2436*base_size = ent->size;2437returnxmemdupz(ent->data, ent->size);2438}24392440staticinlinevoidrelease_delta_base_cache(struct delta_base_cache_entry *ent)2441{2442free(ent->data);2443detach_delta_base_cache_entry(ent);2444}24452446voidclear_delta_base_cache(void)2447{2448struct list_head *lru, *tmp;2449list_for_each_safe(lru, tmp, &delta_base_cache_lru) {2450struct delta_base_cache_entry *entry =2451list_entry(lru,struct delta_base_cache_entry, lru);2452release_delta_base_cache(entry);2453}2454}24552456static voidadd_delta_base_cache(struct packed_git *p, off_t base_offset,2457void*base,unsigned long base_size,enum object_type type)2458{2459struct delta_base_cache_entry *ent =xmalloc(sizeof(*ent));2460struct list_head *lru, *tmp;24612462 delta_base_cached += base_size;24632464list_for_each_safe(lru, tmp, &delta_base_cache_lru) {2465struct delta_base_cache_entry *f =2466list_entry(lru,struct delta_base_cache_entry, lru);2467if(delta_base_cached <= delta_base_cache_limit)2468break;2469release_delta_base_cache(f);2470}24712472 ent->key.p = p;2473 ent->key.base_offset = base_offset;2474 ent->type = type;2475 ent->data = base;2476 ent->size = base_size;2477list_add_tail(&ent->lru, &delta_base_cache_lru);24782479if(!delta_base_cache.cmpfn)2480hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp,0);2481hashmap_entry_init(ent,pack_entry_hash(p, base_offset));2482hashmap_add(&delta_base_cache, ent);2483}24842485static void*read_object(const unsigned char*sha1,enum object_type *type,2486unsigned long*size);24872488static voidwrite_pack_access_log(struct packed_git *p, off_t obj_offset)2489{2490static struct trace_key pack_access =TRACE_KEY_INIT(PACK_ACCESS);2491trace_printf_key(&pack_access,"%s%"PRIuMAX"\n",2492 p->pack_name, (uintmax_t)obj_offset);2493}24942495int do_check_packed_object_crc;24962497#define UNPACK_ENTRY_STACK_PREALLOC 642498struct unpack_entry_stack_ent {2499 off_t obj_offset;2500 off_t curpos;2501unsigned long size;2502};25032504void*unpack_entry(struct packed_git *p, off_t obj_offset,2505enum object_type *final_type,unsigned long*final_size)2506{2507struct pack_window *w_curs = NULL;2508 off_t curpos = obj_offset;2509void*data = NULL;2510unsigned long size;2511enum object_type type;2512struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];2513struct unpack_entry_stack_ent *delta_stack = small_delta_stack;2514int delta_stack_nr =0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;2515int base_from_cache =0;25162517write_pack_access_log(p, obj_offset);25182519/* PHASE 1: drill down to the innermost base object */2520for(;;) {2521 off_t base_offset;2522int i;2523struct delta_base_cache_entry *ent;25242525 ent =get_delta_base_cache_entry(p, curpos);2526if(ent) {2527 type = ent->type;2528 data = ent->data;2529 size = ent->size;2530detach_delta_base_cache_entry(ent);2531 base_from_cache =1;2532break;2533}25342535if(do_check_packed_object_crc && p->index_version >1) {2536struct revindex_entry *revidx =find_pack_revindex(p, obj_offset);2537 off_t len = revidx[1].offset - obj_offset;2538if(check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {2539const unsigned char*sha1 =2540nth_packed_object_sha1(p, revidx->nr);2541error("bad packed object CRC for%s",2542sha1_to_hex(sha1));2543mark_bad_packed_object(p, sha1);2544unuse_pack(&w_curs);2545return NULL;2546}2547}25482549 type =unpack_object_header(p, &w_curs, &curpos, &size);2550if(type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)2551break;25522553 base_offset =get_delta_base(p, &w_curs, &curpos, type, obj_offset);2554if(!base_offset) {2555error("failed to validate delta base reference "2556"at offset %"PRIuMAX" from%s",2557(uintmax_t)curpos, p->pack_name);2558/* bail to phase 2, in hopes of recovery */2559 data = NULL;2560break;2561}25622563/* push object, proceed to base */2564if(delta_stack_nr >= delta_stack_alloc2565&& delta_stack == small_delta_stack) {2566 delta_stack_alloc =alloc_nr(delta_stack_nr);2567ALLOC_ARRAY(delta_stack, delta_stack_alloc);2568memcpy(delta_stack, small_delta_stack,2569sizeof(*delta_stack)*delta_stack_nr);2570}else{2571ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);2572}2573 i = delta_stack_nr++;2574 delta_stack[i].obj_offset = obj_offset;2575 delta_stack[i].curpos = curpos;2576 delta_stack[i].size = size;25772578 curpos = obj_offset = base_offset;2579}25802581/* PHASE 2: handle the base */2582switch(type) {2583case OBJ_OFS_DELTA:2584case OBJ_REF_DELTA:2585if(data)2586die("BUG: unpack_entry: left loop at a valid delta");2587break;2588case OBJ_COMMIT:2589case OBJ_TREE:2590case OBJ_BLOB:2591case OBJ_TAG:2592if(!base_from_cache)2593 data =unpack_compressed_entry(p, &w_curs, curpos, size);2594break;2595default:2596 data = NULL;2597error("unknown object type%iat offset %"PRIuMAX" in%s",2598 type, (uintmax_t)obj_offset, p->pack_name);2599}26002601/* PHASE 3: apply deltas in order */26022603/* invariants:2604 * 'data' holds the base data, or NULL if there was corruption2605 */2606while(delta_stack_nr) {2607void*delta_data;2608void*base = data;2609void*external_base = NULL;2610unsigned long delta_size, base_size = size;2611int i;26122613 data = NULL;26142615if(base)2616add_delta_base_cache(p, obj_offset, base, base_size, type);26172618if(!base) {2619/*2620 * We're probably in deep shit, but let's try to fetch2621 * the required base anyway from another pack or loose.2622 * This is costly but should happen only in the presence2623 * of a corrupted pack, and is better than failing outright.2624 */2625struct revindex_entry *revidx;2626const unsigned char*base_sha1;2627 revidx =find_pack_revindex(p, obj_offset);2628if(revidx) {2629 base_sha1 =nth_packed_object_sha1(p, revidx->nr);2630error("failed to read delta base object%s"2631" at offset %"PRIuMAX" from%s",2632sha1_to_hex(base_sha1), (uintmax_t)obj_offset,2633 p->pack_name);2634mark_bad_packed_object(p, base_sha1);2635 base =read_object(base_sha1, &type, &base_size);2636 external_base = base;2637}2638}26392640 i = --delta_stack_nr;2641 obj_offset = delta_stack[i].obj_offset;2642 curpos = delta_stack[i].curpos;2643 delta_size = delta_stack[i].size;26442645if(!base)2646continue;26472648 delta_data =unpack_compressed_entry(p, &w_curs, curpos, delta_size);26492650if(!delta_data) {2651error("failed to unpack compressed delta "2652"at offset %"PRIuMAX" from%s",2653(uintmax_t)curpos, p->pack_name);2654 data = NULL;2655free(external_base);2656continue;2657}26582659 data =patch_delta(base, base_size,2660 delta_data, delta_size,2661&size);26622663/*2664 * We could not apply the delta; warn the user, but keep going.2665 * Our failure will be noticed either in the next iteration of2666 * the loop, or if this is the final delta, in the caller when2667 * we return NULL. Those code paths will take care of making2668 * a more explicit warning and retrying with another copy of2669 * the object.2670 */2671if(!data)2672error("failed to apply delta");26732674free(delta_data);2675free(external_base);2676}26772678*final_type = type;2679*final_size = size;26802681unuse_pack(&w_curs);26822683if(delta_stack != small_delta_stack)2684free(delta_stack);26852686return data;2687}26882689const unsigned char*nth_packed_object_sha1(struct packed_git *p,2690uint32_t n)2691{2692const unsigned char*index = p->index_data;2693if(!index) {2694if(open_pack_index(p))2695return NULL;2696 index = p->index_data;2697}2698if(n >= p->num_objects)2699return NULL;2700 index +=4*256;2701if(p->index_version ==1) {2702return index +24* n +4;2703}else{2704 index +=8;2705return index +20* n;2706}2707}27082709voidcheck_pack_index_ptr(const struct packed_git *p,const void*vptr)2710{2711const unsigned char*ptr = vptr;2712const unsigned char*start = p->index_data;2713const unsigned char*end = start + p->index_size;2714if(ptr < start)2715die(_("offset before start of pack index for%s(corrupt index?)"),2716 p->pack_name);2717/* No need to check for underflow; .idx files must be at least 8 bytes */2718if(ptr >= end -8)2719die(_("offset beyond end of pack index for%s(truncated index?)"),2720 p->pack_name);2721}27222723off_t nth_packed_object_offset(const struct packed_git *p,uint32_t n)2724{2725const unsigned char*index = p->index_data;2726 index +=4*256;2727if(p->index_version ==1) {2728returnntohl(*((uint32_t*)(index +24* n)));2729}else{2730uint32_t off;2731 index +=8+ p->num_objects * (20+4);2732 off =ntohl(*((uint32_t*)(index +4* n)));2733if(!(off &0x80000000))2734return off;2735 index += p->num_objects *4+ (off &0x7fffffff) *8;2736check_pack_index_ptr(p, index);2737return(((uint64_t)ntohl(*((uint32_t*)(index +0)))) <<32) |2738ntohl(*((uint32_t*)(index +4)));2739}2740}27412742off_t find_pack_entry_one(const unsigned char*sha1,2743struct packed_git *p)2744{2745const uint32_t*level1_ofs = p->index_data;2746const unsigned char*index = p->index_data;2747unsigned hi, lo, stride;2748static int use_lookup = -1;2749static int debug_lookup = -1;27502751if(debug_lookup <0)2752 debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");27532754if(!index) {2755if(open_pack_index(p))2756return0;2757 level1_ofs = p->index_data;2758 index = p->index_data;2759}2760if(p->index_version >1) {2761 level1_ofs +=2;2762 index +=8;2763}2764 index +=4*256;2765 hi =ntohl(level1_ofs[*sha1]);2766 lo = ((*sha1 ==0x0) ?0:ntohl(level1_ofs[*sha1 -1]));2767if(p->index_version >1) {2768 stride =20;2769}else{2770 stride =24;2771 index +=4;2772}27732774if(debug_lookup)2775printf("%02x%02x%02x... lo%uhi%unr %"PRIu32"\n",2776 sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);27772778if(use_lookup <0)2779 use_lookup = !!getenv("GIT_USE_LOOKUP");2780if(use_lookup) {2781int pos =sha1_entry_pos(index, stride,0,2782 lo, hi, p->num_objects, sha1);2783if(pos <0)2784return0;2785returnnth_packed_object_offset(p, pos);2786}27872788do{2789unsigned mi = (lo + hi) /2;2790int cmp =hashcmp(index + mi * stride, sha1);27912792if(debug_lookup)2793printf("lo%uhi%urg%umi%u\n",2794 lo, hi, hi - lo, mi);2795if(!cmp)2796returnnth_packed_object_offset(p, mi);2797if(cmp >0)2798 hi = mi;2799else2800 lo = mi+1;2801}while(lo < hi);2802return0;2803}28042805intis_pack_valid(struct packed_git *p)2806{2807/* An already open pack is known to be valid. */2808if(p->pack_fd != -1)2809return1;28102811/* If the pack has one window completely covering the2812 * file size, the pack is known to be valid even if2813 * the descriptor is not currently open.2814 */2815if(p->windows) {2816struct pack_window *w = p->windows;28172818if(!w->offset && w->len == p->pack_size)2819return1;2820}28212822/* Force the pack to open to prove its valid. */2823return!open_packed_git(p);2824}28252826static intfill_pack_entry(const unsigned char*sha1,2827struct pack_entry *e,2828struct packed_git *p)2829{2830 off_t offset;28312832if(p->num_bad_objects) {2833unsigned i;2834for(i =0; i < p->num_bad_objects; i++)2835if(!hashcmp(sha1, p->bad_object_sha1 +20* i))2836return0;2837}28382839 offset =find_pack_entry_one(sha1, p);2840if(!offset)2841return0;28422843/*2844 * We are about to tell the caller where they can locate the2845 * requested object. We better make sure the packfile is2846 * still here and can be accessed before supplying that2847 * answer, as it may have been deleted since the index was2848 * loaded!2849 */2850if(!is_pack_valid(p))2851return0;2852 e->offset = offset;2853 e->p = p;2854hashcpy(e->sha1, sha1);2855return1;2856}28572858/*2859 * Iff a pack file contains the object named by sha1, return true and2860 * store its location to e.2861 */2862static intfind_pack_entry(const unsigned char*sha1,struct pack_entry *e)2863{2864struct mru_entry *p;28652866prepare_packed_git();2867if(!packed_git)2868return0;28692870for(p = packed_git_mru->head; p; p = p->next) {2871if(fill_pack_entry(sha1, e, p->item)) {2872mru_mark(packed_git_mru, p);2873return1;2874}2875}2876return0;2877}28782879struct packed_git *find_sha1_pack(const unsigned char*sha1,2880struct packed_git *packs)2881{2882struct packed_git *p;28832884for(p = packs; p; p = p->next) {2885if(find_pack_entry_one(sha1, p))2886return p;2887}2888return NULL;28892890}28912892static intsha1_loose_object_info(const unsigned char*sha1,2893struct object_info *oi,2894int flags)2895{2896int status =0;2897unsigned long mapsize;2898void*map;2899 git_zstream stream;2900char hdr[32];2901struct strbuf hdrbuf = STRBUF_INIT;29022903if(oi->delta_base_sha1)2904hashclr(oi->delta_base_sha1);29052906/*2907 * If we don't care about type or size, then we don't2908 * need to look inside the object at all. Note that we2909 * do not optimize out the stat call, even if the2910 * caller doesn't care about the disk-size, since our2911 * return value implicitly indicates whether the2912 * object even exists.2913 */2914if(!oi->typep && !oi->typename && !oi->sizep) {2915const char*path;2916struct stat st;2917if(stat_sha1_file(sha1, &st, &path) <0)2918return-1;2919if(oi->disk_sizep)2920*oi->disk_sizep = st.st_size;2921return0;2922}29232924 map =map_sha1_file(sha1, &mapsize);2925if(!map)2926return-1;2927if(oi->disk_sizep)2928*oi->disk_sizep = mapsize;2929if((flags & LOOKUP_UNKNOWN_OBJECT)) {2930if(unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr,sizeof(hdr), &hdrbuf) <0)2931 status =error("unable to unpack%sheader with --allow-unknown-type",2932sha1_to_hex(sha1));2933}else if(unpack_sha1_header(&stream, map, mapsize, hdr,sizeof(hdr)) <0)2934 status =error("unable to unpack%sheader",2935sha1_to_hex(sha1));2936if(status <0)2937;/* Do nothing */2938else if(hdrbuf.len) {2939if((status =parse_sha1_header_extended(hdrbuf.buf, oi, flags)) <0)2940 status =error("unable to parse%sheader with --allow-unknown-type",2941sha1_to_hex(sha1));2942}else if((status =parse_sha1_header_extended(hdr, oi, flags)) <0)2943 status =error("unable to parse%sheader",sha1_to_hex(sha1));2944git_inflate_end(&stream);2945munmap(map, mapsize);2946if(status && oi->typep)2947*oi->typep = status;2948strbuf_release(&hdrbuf);2949return0;2950}29512952intsha1_object_info_extended(const unsigned char*sha1,struct object_info *oi,unsigned flags)2953{2954struct cached_object *co;2955struct pack_entry e;2956int rtype;2957enum object_type real_type;2958const unsigned char*real =lookup_replace_object_extended(sha1, flags);29592960 co =find_cached_object(real);2961if(co) {2962if(oi->typep)2963*(oi->typep) = co->type;2964if(oi->sizep)2965*(oi->sizep) = co->size;2966if(oi->disk_sizep)2967*(oi->disk_sizep) =0;2968if(oi->delta_base_sha1)2969hashclr(oi->delta_base_sha1);2970if(oi->typename)2971strbuf_addstr(oi->typename,typename(co->type));2972 oi->whence = OI_CACHED;2973return0;2974}29752976if(!find_pack_entry(real, &e)) {2977/* Most likely it's a loose object. */2978if(!sha1_loose_object_info(real, oi, flags)) {2979 oi->whence = OI_LOOSE;2980return0;2981}29822983/* Not a loose object; someone else may have just packed it. */2984reprepare_packed_git();2985if(!find_pack_entry(real, &e))2986return-1;2987}29882989/*2990 * packed_object_info() does not follow the delta chain to2991 * find out the real type, unless it is given oi->typep.2992 */2993if(oi->typename && !oi->typep)2994 oi->typep = &real_type;29952996 rtype =packed_object_info(e.p, e.offset, oi);2997if(rtype <0) {2998mark_bad_packed_object(e.p, real);2999if(oi->typep == &real_type)3000 oi->typep = NULL;3001returnsha1_object_info_extended(real, oi,0);3002}else if(in_delta_base_cache(e.p, e.offset)) {3003 oi->whence = OI_DBCACHED;3004}else{3005 oi->whence = OI_PACKED;3006 oi->u.packed.offset = e.offset;3007 oi->u.packed.pack = e.p;3008 oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||3009 rtype == OBJ_OFS_DELTA);3010}3011if(oi->typename)3012strbuf_addstr(oi->typename,typename(*oi->typep));3013if(oi->typep == &real_type)3014 oi->typep = NULL;30153016return0;3017}30183019/* returns enum object_type or negative */3020intsha1_object_info(const unsigned char*sha1,unsigned long*sizep)3021{3022enum object_type type;3023struct object_info oi = OBJECT_INFO_INIT;30243025 oi.typep = &type;3026 oi.sizep = sizep;3027if(sha1_object_info_extended(sha1, &oi, LOOKUP_REPLACE_OBJECT) <0)3028return-1;3029return type;3030}30313032static void*read_packed_sha1(const unsigned char*sha1,3033enum object_type *type,unsigned long*size)3034{3035struct pack_entry e;3036void*data;30373038if(!find_pack_entry(sha1, &e))3039return NULL;3040 data =cache_or_unpack_entry(e.p, e.offset, size, type);3041if(!data) {3042/*3043 * We're probably in deep shit, but let's try to fetch3044 * the required object anyway from another pack or loose.3045 * This should happen only in the presence of a corrupted3046 * pack, and is better than failing outright.3047 */3048error("failed to read object%sat offset %"PRIuMAX" from%s",3049sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);3050mark_bad_packed_object(e.p, sha1);3051 data =read_object(sha1, type, size);3052}3053return data;3054}30553056intpretend_sha1_file(void*buf,unsigned long len,enum object_type type,3057unsigned char*sha1)3058{3059struct cached_object *co;30603061hash_sha1_file(buf, len,typename(type), sha1);3062if(has_sha1_file(sha1) ||find_cached_object(sha1))3063return0;3064ALLOC_GROW(cached_objects, cached_object_nr +1, cached_object_alloc);3065 co = &cached_objects[cached_object_nr++];3066 co->size = len;3067 co->type = type;3068 co->buf =xmalloc(len);3069memcpy(co->buf, buf, len);3070hashcpy(co->sha1, sha1);3071return0;3072}30733074static void*read_object(const unsigned char*sha1,enum object_type *type,3075unsigned long*size)3076{3077unsigned long mapsize;3078void*map, *buf;3079struct cached_object *co;30803081 co =find_cached_object(sha1);3082if(co) {3083*type = co->type;3084*size = co->size;3085returnxmemdupz(co->buf, co->size);3086}30873088 buf =read_packed_sha1(sha1, type, size);3089if(buf)3090return buf;3091 map =map_sha1_file(sha1, &mapsize);3092if(map) {3093 buf =unpack_sha1_file(map, mapsize, type, size, sha1);3094munmap(map, mapsize);3095return buf;3096}3097reprepare_packed_git();3098returnread_packed_sha1(sha1, type, size);3099}31003101/*3102 * This function dies on corrupt objects; the callers who want to3103 * deal with them should arrange to call read_object() and give error3104 * messages themselves.3105 */3106void*read_sha1_file_extended(const unsigned char*sha1,3107enum object_type *type,3108unsigned long*size,3109unsigned flag)3110{3111void*data;3112const struct packed_git *p;3113const char*path;3114struct stat st;3115const unsigned char*repl =lookup_replace_object_extended(sha1, flag);31163117 errno =0;3118 data =read_object(repl, type, size);3119if(data)3120return data;31213122if(errno && errno != ENOENT)3123die_errno("failed to read object%s",sha1_to_hex(sha1));31243125/* die if we replaced an object with one that does not exist */3126if(repl != sha1)3127die("replacement%snot found for%s",3128sha1_to_hex(repl),sha1_to_hex(sha1));31293130if(!stat_sha1_file(repl, &st, &path))3131die("loose object%s(stored in%s) is corrupt",3132sha1_to_hex(repl), path);31333134if((p =has_packed_and_bad(repl)) != NULL)3135die("packed object%s(stored in%s) is corrupt",3136sha1_to_hex(repl), p->pack_name);31373138return NULL;3139}31403141void*read_object_with_reference(const unsigned char*sha1,3142const char*required_type_name,3143unsigned long*size,3144unsigned char*actual_sha1_return)3145{3146enum object_type type, required_type;3147void*buffer;3148unsigned long isize;3149unsigned char actual_sha1[20];31503151 required_type =type_from_string(required_type_name);3152hashcpy(actual_sha1, sha1);3153while(1) {3154int ref_length = -1;3155const char*ref_type = NULL;31563157 buffer =read_sha1_file(actual_sha1, &type, &isize);3158if(!buffer)3159return NULL;3160if(type == required_type) {3161*size = isize;3162if(actual_sha1_return)3163hashcpy(actual_sha1_return, actual_sha1);3164return buffer;3165}3166/* Handle references */3167else if(type == OBJ_COMMIT)3168 ref_type ="tree ";3169else if(type == OBJ_TAG)3170 ref_type ="object ";3171else{3172free(buffer);3173return NULL;3174}3175 ref_length =strlen(ref_type);31763177if(ref_length +40> isize ||3178memcmp(buffer, ref_type, ref_length) ||3179get_sha1_hex((char*) buffer + ref_length, actual_sha1)) {3180free(buffer);3181return NULL;3182}3183free(buffer);3184/* Now we have the ID of the referred-to object in3185 * actual_sha1. Check again. */3186}3187}31883189static voidwrite_sha1_file_prepare(const void*buf,unsigned long len,3190const char*type,unsigned char*sha1,3191char*hdr,int*hdrlen)3192{3193 git_SHA_CTX c;31943195/* Generate the header */3196*hdrlen =xsnprintf(hdr, *hdrlen,"%s %lu", type, len)+1;31973198/* Sha1.. */3199git_SHA1_Init(&c);3200git_SHA1_Update(&c, hdr, *hdrlen);3201git_SHA1_Update(&c, buf, len);3202git_SHA1_Final(sha1, &c);3203}32043205/*3206 * Move the just written object into its final resting place.3207 */3208intfinalize_object_file(const char*tmpfile,const char*filename)3209{3210int ret =0;32113212if(object_creation_mode == OBJECT_CREATION_USES_RENAMES)3213goto try_rename;3214else if(link(tmpfile, filename))3215 ret = errno;32163217/*3218 * Coda hack - coda doesn't like cross-directory links,3219 * so we fall back to a rename, which will mean that it3220 * won't be able to check collisions, but that's not a3221 * big deal.3222 *3223 * The same holds for FAT formatted media.3224 *3225 * When this succeeds, we just return. We have nothing3226 * left to unlink.3227 */3228if(ret && ret != EEXIST) {3229 try_rename:3230if(!rename(tmpfile, filename))3231goto out;3232 ret = errno;3233}3234unlink_or_warn(tmpfile);3235if(ret) {3236if(ret != EEXIST) {3237returnerror_errno("unable to write sha1 filename%s", filename);3238}3239/* FIXME!!! Collision check here ? */3240}32413242out:3243if(adjust_shared_perm(filename))3244returnerror("unable to set permission to '%s'", filename);3245return0;3246}32473248static intwrite_buffer(int fd,const void*buf,size_t len)3249{3250if(write_in_full(fd, buf, len) <0)3251returnerror_errno("file write error");3252return0;3253}32543255inthash_sha1_file(const void*buf,unsigned long len,const char*type,3256unsigned char*sha1)3257{3258char hdr[32];3259int hdrlen =sizeof(hdr);3260write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);3261return0;3262}32633264/* Finalize a file on disk, and close it. */3265static voidclose_sha1_file(int fd)3266{3267if(fsync_object_files)3268fsync_or_die(fd,"sha1 file");3269if(close(fd) !=0)3270die_errno("error when closing sha1 file");3271}32723273/* Size of directory component, including the ending '/' */3274staticinlineintdirectory_size(const char*filename)3275{3276const char*s =strrchr(filename,'/');3277if(!s)3278return0;3279return s - filename +1;3280}32813282/*3283 * This creates a temporary file in the same directory as the final3284 * 'filename'3285 *3286 * We want to avoid cross-directory filename renames, because those3287 * can have problems on various filesystems (FAT, NFS, Coda).3288 */3289static intcreate_tmpfile(struct strbuf *tmp,const char*filename)3290{3291int fd, dirlen =directory_size(filename);32923293strbuf_reset(tmp);3294strbuf_add(tmp, filename, dirlen);3295strbuf_addstr(tmp,"tmp_obj_XXXXXX");3296 fd =git_mkstemp_mode(tmp->buf,0444);3297if(fd <0&& dirlen && errno == ENOENT) {3298/*3299 * Make sure the directory exists; note that the contents3300 * of the buffer are undefined after mkstemp returns an3301 * error, so we have to rewrite the whole buffer from3302 * scratch.3303 */3304strbuf_reset(tmp);3305strbuf_add(tmp, filename, dirlen -1);3306if(mkdir(tmp->buf,0777) && errno != EEXIST)3307return-1;3308if(adjust_shared_perm(tmp->buf))3309return-1;33103311/* Try again */3312strbuf_addstr(tmp,"/tmp_obj_XXXXXX");3313 fd =git_mkstemp_mode(tmp->buf,0444);3314}3315return fd;3316}33173318static intwrite_loose_object(const unsigned char*sha1,char*hdr,int hdrlen,3319const void*buf,unsigned long len,time_t mtime)3320{3321int fd, ret;3322unsigned char compressed[4096];3323 git_zstream stream;3324 git_SHA_CTX c;3325unsigned char parano_sha1[20];3326static struct strbuf tmp_file = STRBUF_INIT;3327const char*filename =sha1_file_name(sha1);33283329 fd =create_tmpfile(&tmp_file, filename);3330if(fd <0) {3331if(errno == EACCES)3332returnerror("insufficient permission for adding an object to repository database%s",get_object_directory());3333else3334returnerror_errno("unable to create temporary file");3335}33363337/* Set it up */3338git_deflate_init(&stream, zlib_compression_level);3339 stream.next_out = compressed;3340 stream.avail_out =sizeof(compressed);3341git_SHA1_Init(&c);33423343/* First header.. */3344 stream.next_in = (unsigned char*)hdr;3345 stream.avail_in = hdrlen;3346while(git_deflate(&stream,0) == Z_OK)3347;/* nothing */3348git_SHA1_Update(&c, hdr, hdrlen);33493350/* Then the data itself.. */3351 stream.next_in = (void*)buf;3352 stream.avail_in = len;3353do{3354unsigned char*in0 = stream.next_in;3355 ret =git_deflate(&stream, Z_FINISH);3356git_SHA1_Update(&c, in0, stream.next_in - in0);3357if(write_buffer(fd, compressed, stream.next_out - compressed) <0)3358die("unable to write sha1 file");3359 stream.next_out = compressed;3360 stream.avail_out =sizeof(compressed);3361}while(ret == Z_OK);33623363if(ret != Z_STREAM_END)3364die("unable to deflate new object%s(%d)",sha1_to_hex(sha1), ret);3365 ret =git_deflate_end_gently(&stream);3366if(ret != Z_OK)3367die("deflateEnd on object%sfailed (%d)",sha1_to_hex(sha1), ret);3368git_SHA1_Final(parano_sha1, &c);3369if(hashcmp(sha1, parano_sha1) !=0)3370die("confused by unstable object source data for%s",sha1_to_hex(sha1));33713372close_sha1_file(fd);33733374if(mtime) {3375struct utimbuf utb;3376 utb.actime = mtime;3377 utb.modtime = mtime;3378if(utime(tmp_file.buf, &utb) <0)3379warning_errno("failed utime() on%s", tmp_file.buf);3380}33813382returnfinalize_object_file(tmp_file.buf, filename);3383}33843385static intfreshen_loose_object(const unsigned char*sha1)3386{3387returncheck_and_freshen(sha1,1);3388}33893390static intfreshen_packed_object(const unsigned char*sha1)3391{3392struct pack_entry e;3393if(!find_pack_entry(sha1, &e))3394return0;3395if(e.p->freshened)3396return1;3397if(!freshen_file(e.p->pack_name))3398return0;3399 e.p->freshened =1;3400return1;3401}34023403intwrite_sha1_file(const void*buf,unsigned long len,const char*type,unsigned char*sha1)3404{3405char hdr[32];3406int hdrlen =sizeof(hdr);34073408/* Normally if we have it in the pack then we do not bother writing3409 * it out into .git/objects/??/?{38} file.3410 */3411write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);3412if(freshen_packed_object(sha1) ||freshen_loose_object(sha1))3413return0;3414returnwrite_loose_object(sha1, hdr, hdrlen, buf, len,0);3415}34163417inthash_sha1_file_literally(const void*buf,unsigned long len,const char*type,3418unsigned char*sha1,unsigned flags)3419{3420char*header;3421int hdrlen, status =0;34223423/* type string, SP, %lu of the length plus NUL must fit this */3424 hdrlen =strlen(type) +32;3425 header =xmalloc(hdrlen);3426write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen);34273428if(!(flags & HASH_WRITE_OBJECT))3429goto cleanup;3430if(freshen_packed_object(sha1) ||freshen_loose_object(sha1))3431goto cleanup;3432 status =write_loose_object(sha1, header, hdrlen, buf, len,0);34333434cleanup:3435free(header);3436return status;3437}34383439intforce_object_loose(const unsigned char*sha1,time_t mtime)3440{3441void*buf;3442unsigned long len;3443enum object_type type;3444char hdr[32];3445int hdrlen;3446int ret;34473448if(has_loose_object(sha1))3449return0;3450 buf =read_packed_sha1(sha1, &type, &len);3451if(!buf)3452returnerror("cannot read sha1_file for%s",sha1_to_hex(sha1));3453 hdrlen =xsnprintf(hdr,sizeof(hdr),"%s %lu",typename(type), len) +1;3454 ret =write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);3455free(buf);34563457return ret;3458}34593460inthas_pack_index(const unsigned char*sha1)3461{3462struct stat st;3463if(stat(sha1_pack_index_name(sha1), &st))3464return0;3465return1;3466}34673468inthas_sha1_pack(const unsigned char*sha1)3469{3470struct pack_entry e;3471returnfind_pack_entry(sha1, &e);3472}34733474inthas_sha1_file_with_flags(const unsigned char*sha1,int flags)3475{3476struct pack_entry e;34773478if(find_pack_entry(sha1, &e))3479return1;3480if(has_loose_object(sha1))3481return1;3482if(flags & HAS_SHA1_QUICK)3483return0;3484reprepare_packed_git();3485returnfind_pack_entry(sha1, &e);3486}34873488inthas_object_file(const struct object_id *oid)3489{3490returnhas_sha1_file(oid->hash);3491}34923493inthas_object_file_with_flags(const struct object_id *oid,int flags)3494{3495returnhas_sha1_file_with_flags(oid->hash, flags);3496}34973498static voidcheck_tree(const void*buf,size_t size)3499{3500struct tree_desc desc;3501struct name_entry entry;35023503init_tree_desc(&desc, buf, size);3504while(tree_entry(&desc, &entry))3505/* do nothing3506 * tree_entry() will die() on malformed entries */3507;3508}35093510static voidcheck_commit(const void*buf,size_t size)3511{3512struct commit c;3513memset(&c,0,sizeof(c));3514if(parse_commit_buffer(&c, buf, size))3515die("corrupt commit");3516}35173518static voidcheck_tag(const void*buf,size_t size)3519{3520struct tag t;3521memset(&t,0,sizeof(t));3522if(parse_tag_buffer(&t, buf, size))3523die("corrupt tag");3524}35253526static intindex_mem(unsigned char*sha1,void*buf,size_t size,3527enum object_type type,3528const char*path,unsigned flags)3529{3530int ret, re_allocated =0;3531int write_object = flags & HASH_WRITE_OBJECT;35323533if(!type)3534 type = OBJ_BLOB;35353536/*3537 * Convert blobs to git internal format3538 */3539if((type == OBJ_BLOB) && path) {3540struct strbuf nbuf = STRBUF_INIT;3541if(convert_to_git(path, buf, size, &nbuf,3542 write_object ? safe_crlf : SAFE_CRLF_FALSE)) {3543 buf =strbuf_detach(&nbuf, &size);3544 re_allocated =1;3545}3546}3547if(flags & HASH_FORMAT_CHECK) {3548if(type == OBJ_TREE)3549check_tree(buf, size);3550if(type == OBJ_COMMIT)3551check_commit(buf, size);3552if(type == OBJ_TAG)3553check_tag(buf, size);3554}35553556if(write_object)3557 ret =write_sha1_file(buf, size,typename(type), sha1);3558else3559 ret =hash_sha1_file(buf, size,typename(type), sha1);3560if(re_allocated)3561free(buf);3562return ret;3563}35643565static intindex_stream_convert_blob(unsigned char*sha1,int fd,3566const char*path,unsigned flags)3567{3568int ret;3569const int write_object = flags & HASH_WRITE_OBJECT;3570struct strbuf sbuf = STRBUF_INIT;35713572assert(path);3573assert(would_convert_to_git_filter_fd(path));35743575convert_to_git_filter_fd(path, fd, &sbuf,3576 write_object ? safe_crlf : SAFE_CRLF_FALSE);35773578if(write_object)3579 ret =write_sha1_file(sbuf.buf, sbuf.len,typename(OBJ_BLOB),3580 sha1);3581else3582 ret =hash_sha1_file(sbuf.buf, sbuf.len,typename(OBJ_BLOB),3583 sha1);3584strbuf_release(&sbuf);3585return ret;3586}35873588static intindex_pipe(unsigned char*sha1,int fd,enum object_type type,3589const char*path,unsigned flags)3590{3591struct strbuf sbuf = STRBUF_INIT;3592int ret;35933594if(strbuf_read(&sbuf, fd,4096) >=0)3595 ret =index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags);3596else3597 ret = -1;3598strbuf_release(&sbuf);3599return ret;3600}36013602#define SMALL_FILE_SIZE (32*1024)36033604static intindex_core(unsigned char*sha1,int fd,size_t size,3605enum object_type type,const char*path,3606unsigned flags)3607{3608int ret;36093610if(!size) {3611 ret =index_mem(sha1,"", size, type, path, flags);3612}else if(size <= SMALL_FILE_SIZE) {3613char*buf =xmalloc(size);3614if(size ==read_in_full(fd, buf, size))3615 ret =index_mem(sha1, buf, size, type, path, flags);3616else3617 ret =error_errno("short read");3618free(buf);3619}else{3620void*buf =xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd,0);3621 ret =index_mem(sha1, buf, size, type, path, flags);3622munmap(buf, size);3623}3624return ret;3625}36263627/*3628 * This creates one packfile per large blob unless bulk-checkin3629 * machinery is "plugged".3630 *3631 * This also bypasses the usual "convert-to-git" dance, and that is on3632 * purpose. We could write a streaming version of the converting3633 * functions and insert that before feeding the data to fast-import3634 * (or equivalent in-core API described above). However, that is3635 * somewhat complicated, as we do not know the size of the filter3636 * result, which we need to know beforehand when writing a git object.3637 * Since the primary motivation for trying to stream from the working3638 * tree file and to avoid mmaping it in core is to deal with large3639 * binary blobs, they generally do not want to get any conversion, and3640 * callers should avoid this code path when filters are requested.3641 */3642static intindex_stream(unsigned char*sha1,int fd,size_t size,3643enum object_type type,const char*path,3644unsigned flags)3645{3646returnindex_bulk_checkin(sha1, fd, size, type, path, flags);3647}36483649intindex_fd(unsigned char*sha1,int fd,struct stat *st,3650enum object_type type,const char*path,unsigned flags)3651{3652int ret;36533654/*3655 * Call xsize_t() only when needed to avoid potentially unnecessary3656 * die() for large files.3657 */3658if(type == OBJ_BLOB && path &&would_convert_to_git_filter_fd(path))3659 ret =index_stream_convert_blob(sha1, fd, path, flags);3660else if(!S_ISREG(st->st_mode))3661 ret =index_pipe(sha1, fd, type, path, flags);3662else if(st->st_size <= big_file_threshold || type != OBJ_BLOB ||3663(path &&would_convert_to_git(path)))3664 ret =index_core(sha1, fd,xsize_t(st->st_size), type, path,3665 flags);3666else3667 ret =index_stream(sha1, fd,xsize_t(st->st_size), type, path,3668 flags);3669close(fd);3670return ret;3671}36723673intindex_path(unsigned char*sha1,const char*path,struct stat *st,unsigned flags)3674{3675int fd;3676struct strbuf sb = STRBUF_INIT;36773678switch(st->st_mode & S_IFMT) {3679case S_IFREG:3680 fd =open(path, O_RDONLY);3681if(fd <0)3682returnerror_errno("open(\"%s\")", path);3683if(index_fd(sha1, fd, st, OBJ_BLOB, path, flags) <0)3684returnerror("%s: failed to insert into database",3685 path);3686break;3687case S_IFLNK:3688if(strbuf_readlink(&sb, path, st->st_size))3689returnerror_errno("readlink(\"%s\")", path);3690if(!(flags & HASH_WRITE_OBJECT))3691hash_sha1_file(sb.buf, sb.len, blob_type, sha1);3692else if(write_sha1_file(sb.buf, sb.len, blob_type, sha1))3693returnerror("%s: failed to insert into database",3694 path);3695strbuf_release(&sb);3696break;3697case S_IFDIR:3698returnresolve_gitlink_ref(path,"HEAD", sha1);3699default:3700returnerror("%s: unsupported file type", path);3701}3702return0;3703}37043705intread_pack_header(int fd,struct pack_header *header)3706{3707if(read_in_full(fd, header,sizeof(*header)) <sizeof(*header))3708/* "eof before pack header was fully read" */3709return PH_ERROR_EOF;37103711if(header->hdr_signature !=htonl(PACK_SIGNATURE))3712/* "protocol error (pack signature mismatch detected)" */3713return PH_ERROR_PACK_SIGNATURE;3714if(!pack_version_ok(header->hdr_version))3715/* "protocol error (pack version unsupported)" */3716return PH_ERROR_PROTOCOL;3717return0;3718}37193720voidassert_sha1_type(const unsigned char*sha1,enum object_type expect)3721{3722enum object_type type =sha1_object_info(sha1, NULL);3723if(type <0)3724die("%sis not a valid object",sha1_to_hex(sha1));3725if(type != expect)3726die("%sis not a valid '%s' object",sha1_to_hex(sha1),3727typename(expect));3728}37293730static intfor_each_file_in_obj_subdir(int subdir_nr,3731struct strbuf *path,3732 each_loose_object_fn obj_cb,3733 each_loose_cruft_fn cruft_cb,3734 each_loose_subdir_fn subdir_cb,3735void*data)3736{3737size_t baselen = path->len;3738DIR*dir =opendir(path->buf);3739struct dirent *de;3740int r =0;37413742if(!dir) {3743if(errno == ENOENT)3744return0;3745returnerror_errno("unable to open%s", path->buf);3746}37473748while((de =readdir(dir))) {3749if(is_dot_or_dotdot(de->d_name))3750continue;37513752strbuf_setlen(path, baselen);3753strbuf_addf(path,"/%s", de->d_name);37543755if(strlen(de->d_name) ==38) {3756char hex[41];3757unsigned char sha1[20];37583759snprintf(hex,sizeof(hex),"%02x%s",3760 subdir_nr, de->d_name);3761if(!get_sha1_hex(hex, sha1)) {3762if(obj_cb) {3763 r =obj_cb(sha1, path->buf, data);3764if(r)3765break;3766}3767continue;3768}3769}37703771if(cruft_cb) {3772 r =cruft_cb(de->d_name, path->buf, data);3773if(r)3774break;3775}3776}3777closedir(dir);37783779strbuf_setlen(path, baselen);3780if(!r && subdir_cb)3781 r =subdir_cb(subdir_nr, path->buf, data);37823783return r;3784}37853786intfor_each_loose_file_in_objdir_buf(struct strbuf *path,3787 each_loose_object_fn obj_cb,3788 each_loose_cruft_fn cruft_cb,3789 each_loose_subdir_fn subdir_cb,3790void*data)3791{3792size_t baselen = path->len;3793int r =0;3794int i;37953796for(i =0; i <256; i++) {3797strbuf_addf(path,"/%02x", i);3798 r =for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,3799 subdir_cb, data);3800strbuf_setlen(path, baselen);3801if(r)3802break;3803}38043805return r;3806}38073808intfor_each_loose_file_in_objdir(const char*path,3809 each_loose_object_fn obj_cb,3810 each_loose_cruft_fn cruft_cb,3811 each_loose_subdir_fn subdir_cb,3812void*data)3813{3814struct strbuf buf = STRBUF_INIT;3815int r;38163817strbuf_addstr(&buf, path);3818 r =for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb,3819 subdir_cb, data);3820strbuf_release(&buf);38213822return r;3823}38243825struct loose_alt_odb_data {3826 each_loose_object_fn *cb;3827void*data;3828};38293830static intloose_from_alt_odb(struct alternate_object_database *alt,3831void*vdata)3832{3833struct loose_alt_odb_data *data = vdata;3834struct strbuf buf = STRBUF_INIT;3835int r;38363837strbuf_addstr(&buf, alt->path);3838 r =for_each_loose_file_in_objdir_buf(&buf,3839 data->cb, NULL, NULL,3840 data->data);3841strbuf_release(&buf);3842return r;3843}38443845intfor_each_loose_object(each_loose_object_fn cb,void*data,unsigned flags)3846{3847struct loose_alt_odb_data alt;3848int r;38493850 r =for_each_loose_file_in_objdir(get_object_directory(),3851 cb, NULL, NULL, data);3852if(r)3853return r;38543855if(flags & FOR_EACH_OBJECT_LOCAL_ONLY)3856return0;38573858 alt.cb = cb;3859 alt.data = data;3860returnforeach_alt_odb(loose_from_alt_odb, &alt);3861}38623863static intfor_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb,void*data)3864{3865uint32_t i;3866int r =0;38673868for(i =0; i < p->num_objects; i++) {3869const unsigned char*sha1 =nth_packed_object_sha1(p, i);38703871if(!sha1)3872returnerror("unable to get sha1 of object%uin%s",3873 i, p->pack_name);38743875 r =cb(sha1, p, i, data);3876if(r)3877break;3878}3879return r;3880}38813882intfor_each_packed_object(each_packed_object_fn cb,void*data,unsigned flags)3883{3884struct packed_git *p;3885int r =0;3886int pack_errors =0;38873888prepare_packed_git();3889for(p = packed_git; p; p = p->next) {3890if((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)3891continue;3892if(open_pack_index(p)) {3893 pack_errors =1;3894continue;3895}3896 r =for_each_object_in_pack(p, cb, data);3897if(r)3898break;3899}3900return r ? r : pack_errors;3901}39023903static intcheck_stream_sha1(git_zstream *stream,3904const char*hdr,3905unsigned long size,3906const char*path,3907const unsigned char*expected_sha1)3908{3909 git_SHA_CTX c;3910unsigned char real_sha1[GIT_SHA1_RAWSZ];3911unsigned char buf[4096];3912unsigned long total_read;3913int status = Z_OK;39143915git_SHA1_Init(&c);3916git_SHA1_Update(&c, hdr, stream->total_out);39173918/*3919 * We already read some bytes into hdr, but the ones up to the NUL3920 * do not count against the object's content size.3921 */3922 total_read = stream->total_out -strlen(hdr) -1;39233924/*3925 * This size comparison must be "<=" to read the final zlib packets;3926 * see the comment in unpack_sha1_rest for details.3927 */3928while(total_read <= size &&3929(status == Z_OK || status == Z_BUF_ERROR)) {3930 stream->next_out = buf;3931 stream->avail_out =sizeof(buf);3932if(size - total_read < stream->avail_out)3933 stream->avail_out = size - total_read;3934 status =git_inflate(stream, Z_FINISH);3935git_SHA1_Update(&c, buf, stream->next_out - buf);3936 total_read += stream->next_out - buf;3937}3938git_inflate_end(stream);39393940if(status != Z_STREAM_END) {3941error("corrupt loose object '%s'",sha1_to_hex(expected_sha1));3942return-1;3943}3944if(stream->avail_in) {3945error("garbage at end of loose object '%s'",3946sha1_to_hex(expected_sha1));3947return-1;3948}39493950git_SHA1_Final(real_sha1, &c);3951if(hashcmp(expected_sha1, real_sha1)) {3952error("sha1 mismatch for%s(expected%s)", path,3953sha1_to_hex(expected_sha1));3954return-1;3955}39563957return0;3958}39593960intread_loose_object(const char*path,3961const unsigned char*expected_sha1,3962enum object_type *type,3963unsigned long*size,3964void**contents)3965{3966int ret = -1;3967int fd = -1;3968void*map = NULL;3969unsigned long mapsize;3970 git_zstream stream;3971char hdr[32];39723973*contents = NULL;39743975 map =map_sha1_file_1(path, NULL, &mapsize);3976if(!map) {3977error_errno("unable to mmap%s", path);3978goto out;3979}39803981if(unpack_sha1_header(&stream, map, mapsize, hdr,sizeof(hdr)) <0) {3982error("unable to unpack header of%s", path);3983goto out;3984}39853986*type =parse_sha1_header(hdr, size);3987if(*type <0) {3988error("unable to parse header of%s", path);3989git_inflate_end(&stream);3990goto out;3991}39923993if(*type == OBJ_BLOB) {3994if(check_stream_sha1(&stream, hdr, *size, path, expected_sha1) <0)3995goto out;3996}else{3997*contents =unpack_sha1_rest(&stream, hdr, *size, expected_sha1);3998if(!*contents) {3999error("unable to unpack contents of%s", path);4000git_inflate_end(&stream);4001goto out;4002}4003if(check_sha1_signature(expected_sha1, *contents,4004*size,typename(*type))) {4005error("sha1 mismatch for%s(expected%s)", path,4006sha1_to_hex(expected_sha1));4007free(*contents);4008goto out;4009}4010}40114012 ret =0;/* everything checks out */40134014out:4015if(map)4016munmap(map, mapsize);4017if(fd >=0)4018close(fd);4019return ret;4020}