1/* 2 * GIT - The information manager from hell 3 * 4 * Copyright (C) Linus Torvalds, 2005 5 * 6 * This handles basic git sha1 object files - packing, unpacking, 7 * creation etc. 8 */ 9#include"cache.h" 10#include"string-list.h" 11#include"lockfile.h" 12#include"delta.h" 13#include"pack.h" 14#include"blob.h" 15#include"commit.h" 16#include"run-command.h" 17#include"tag.h" 18#include"tree.h" 19#include"tree-walk.h" 20#include"refs.h" 21#include"pack-revindex.h" 22#include"sha1-lookup.h" 23#include"bulk-checkin.h" 24#include"streaming.h" 25#include"dir.h" 26#include"mru.h" 27#include"list.h" 28#include"mergesort.h" 29#include"quote.h" 30 31#define SZ_FMT PRIuMAX 32staticinlineuintmax_tsz_fmt(size_t s) {return s; } 33 34const unsigned char null_sha1[20]; 35const struct object_id null_oid; 36const struct object_id empty_tree_oid = { 37 EMPTY_TREE_SHA1_BIN_LITERAL 38}; 39const struct object_id empty_blob_oid = { 40 EMPTY_BLOB_SHA1_BIN_LITERAL 41}; 42 43/* 44 * This is meant to hold a *small* number of objects that you would 45 * want read_sha1_file() to be able to return, but yet you do not want 46 * to write them into the object store (e.g. a browse-only 47 * application). 48 */ 49static struct cached_object { 50unsigned char sha1[20]; 51enum object_type type; 52void*buf; 53unsigned long size; 54} *cached_objects; 55static int cached_object_nr, cached_object_alloc; 56 57static struct cached_object empty_tree = { 58 EMPTY_TREE_SHA1_BIN_LITERAL, 59 OBJ_TREE, 60"", 610 62}; 63 64static struct cached_object *find_cached_object(const unsigned char*sha1) 65{ 66int i; 67struct cached_object *co = cached_objects; 68 69for(i =0; i < cached_object_nr; i++, co++) { 70if(!hashcmp(co->sha1, sha1)) 71return co; 72} 73if(!hashcmp(sha1, empty_tree.sha1)) 74return&empty_tree; 75return NULL; 76} 77 78intmkdir_in_gitdir(const char*path) 79{ 80if(mkdir(path,0777)) { 81int saved_errno = errno; 82struct stat st; 83struct strbuf sb = STRBUF_INIT; 84 85if(errno != EEXIST) 86return-1; 87/* 88 * Are we looking at a path in a symlinked worktree 89 * whose original repository does not yet have it? 90 * e.g. .git/rr-cache pointing at its original 91 * repository in which the user hasn't performed any 92 * conflict resolution yet? 93 */ 94if(lstat(path, &st) || !S_ISLNK(st.st_mode) || 95strbuf_readlink(&sb, path, st.st_size) || 96!is_absolute_path(sb.buf) || 97mkdir(sb.buf,0777)) { 98strbuf_release(&sb); 99 errno = saved_errno; 100return-1; 101} 102strbuf_release(&sb); 103} 104returnadjust_shared_perm(path); 105} 106 107enum scld_error safe_create_leading_directories(char*path) 108{ 109char*next_component = path +offset_1st_component(path); 110enum scld_error ret = SCLD_OK; 111 112while(ret == SCLD_OK && next_component) { 113struct stat st; 114char*slash = next_component, slash_character; 115 116while(*slash && !is_dir_sep(*slash)) 117 slash++; 118 119if(!*slash) 120break; 121 122 next_component = slash +1; 123while(is_dir_sep(*next_component)) 124 next_component++; 125if(!*next_component) 126break; 127 128 slash_character = *slash; 129*slash ='\0'; 130if(!stat(path, &st)) { 131/* path exists */ 132if(!S_ISDIR(st.st_mode)) { 133 errno = ENOTDIR; 134 ret = SCLD_EXISTS; 135} 136}else if(mkdir(path,0777)) { 137if(errno == EEXIST && 138!stat(path, &st) &&S_ISDIR(st.st_mode)) 139;/* somebody created it since we checked */ 140else if(errno == ENOENT) 141/* 142 * Either mkdir() failed because 143 * somebody just pruned the containing 144 * directory, or stat() failed because 145 * the file that was in our way was 146 * just removed. Either way, inform 147 * the caller that it might be worth 148 * trying again: 149 */ 150 ret = SCLD_VANISHED; 151else 152 ret = SCLD_FAILED; 153}else if(adjust_shared_perm(path)) { 154 ret = SCLD_PERMS; 155} 156*slash = slash_character; 157} 158return ret; 159} 160 161enum scld_error safe_create_leading_directories_const(const char*path) 162{ 163int save_errno; 164/* path points to cache entries, so xstrdup before messing with it */ 165char*buf =xstrdup(path); 166enum scld_error result =safe_create_leading_directories(buf); 167 168 save_errno = errno; 169free(buf); 170 errno = save_errno; 171return result; 172} 173 174intraceproof_create_file(const char*path, create_file_fn fn,void*cb) 175{ 176/* 177 * The number of times we will try to remove empty directories 178 * in the way of path. This is only 1 because if another 179 * process is racily creating directories that conflict with 180 * us, we don't want to fight against them. 181 */ 182int remove_directories_remaining =1; 183 184/* 185 * The number of times that we will try to create the 186 * directories containing path. We are willing to attempt this 187 * more than once, because another process could be trying to 188 * clean up empty directories at the same time as we are 189 * trying to create them. 190 */ 191int create_directories_remaining =3; 192 193/* A scratch copy of path, filled lazily if we need it: */ 194struct strbuf path_copy = STRBUF_INIT; 195 196int ret, save_errno; 197 198/* Sanity check: */ 199assert(*path); 200 201retry_fn: 202 ret =fn(path, cb); 203 save_errno = errno; 204if(!ret) 205goto out; 206 207if(errno == EISDIR && remove_directories_remaining-- >0) { 208/* 209 * A directory is in the way. Maybe it is empty; try 210 * to remove it: 211 */ 212if(!path_copy.len) 213strbuf_addstr(&path_copy, path); 214 215if(!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY)) 216goto retry_fn; 217}else if(errno == ENOENT && create_directories_remaining-- >0) { 218/* 219 * Maybe the containing directory didn't exist, or 220 * maybe it was just deleted by a process that is 221 * racing with us to clean up empty directories. Try 222 * to create it: 223 */ 224enum scld_error scld_result; 225 226if(!path_copy.len) 227strbuf_addstr(&path_copy, path); 228 229do{ 230 scld_result =safe_create_leading_directories(path_copy.buf); 231if(scld_result == SCLD_OK) 232goto retry_fn; 233}while(scld_result == SCLD_VANISHED && create_directories_remaining-- >0); 234} 235 236out: 237strbuf_release(&path_copy); 238 errno = save_errno; 239return ret; 240} 241 242static voidfill_sha1_path(struct strbuf *buf,const unsigned char*sha1) 243{ 244int i; 245for(i =0; i <20; i++) { 246static char hex[] ="0123456789abcdef"; 247unsigned int val = sha1[i]; 248strbuf_addch(buf, hex[val >>4]); 249strbuf_addch(buf, hex[val &0xf]); 250if(!i) 251strbuf_addch(buf,'/'); 252} 253} 254 255const char*sha1_file_name(const unsigned char*sha1) 256{ 257static struct strbuf buf = STRBUF_INIT; 258 259strbuf_reset(&buf); 260strbuf_addf(&buf,"%s/",get_object_directory()); 261 262fill_sha1_path(&buf, sha1); 263return buf.buf; 264} 265 266struct strbuf *alt_scratch_buf(struct alternate_object_database *alt) 267{ 268strbuf_setlen(&alt->scratch, alt->base_len); 269return&alt->scratch; 270} 271 272static const char*alt_sha1_path(struct alternate_object_database *alt, 273const unsigned char*sha1) 274{ 275struct strbuf *buf =alt_scratch_buf(alt); 276fill_sha1_path(buf, sha1); 277return buf->buf; 278} 279 280/* 281 * Return the name of the pack or index file with the specified sha1 282 * in its filename. *base and *name are scratch space that must be 283 * provided by the caller. which should be "pack" or "idx". 284 */ 285static char*sha1_get_pack_name(const unsigned char*sha1, 286struct strbuf *buf, 287const char*which) 288{ 289strbuf_reset(buf); 290strbuf_addf(buf,"%s/pack/pack-%s.%s",get_object_directory(), 291sha1_to_hex(sha1), which); 292return buf->buf; 293} 294 295char*sha1_pack_name(const unsigned char*sha1) 296{ 297static struct strbuf buf = STRBUF_INIT; 298returnsha1_get_pack_name(sha1, &buf,"pack"); 299} 300 301char*sha1_pack_index_name(const unsigned char*sha1) 302{ 303static struct strbuf buf = STRBUF_INIT; 304returnsha1_get_pack_name(sha1, &buf,"idx"); 305} 306 307struct alternate_object_database *alt_odb_list; 308static struct alternate_object_database **alt_odb_tail; 309 310/* 311 * Return non-zero iff the path is usable as an alternate object database. 312 */ 313static intalt_odb_usable(struct strbuf *path,const char*normalized_objdir) 314{ 315struct alternate_object_database *alt; 316 317/* Detect cases where alternate disappeared */ 318if(!is_directory(path->buf)) { 319error("object directory%sdoes not exist; " 320"check .git/objects/info/alternates.", 321 path->buf); 322return0; 323} 324 325/* 326 * Prevent the common mistake of listing the same 327 * thing twice, or object directory itself. 328 */ 329for(alt = alt_odb_list; alt; alt = alt->next) { 330if(!fspathcmp(path->buf, alt->path)) 331return0; 332} 333if(!fspathcmp(path->buf, normalized_objdir)) 334return0; 335 336return1; 337} 338 339/* 340 * Prepare alternate object database registry. 341 * 342 * The variable alt_odb_list points at the list of struct 343 * alternate_object_database. The elements on this list come from 344 * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT 345 * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates, 346 * whose contents is similar to that environment variable but can be 347 * LF separated. Its base points at a statically allocated buffer that 348 * contains "/the/directory/corresponding/to/.git/objects/...", while 349 * its name points just after the slash at the end of ".git/objects/" 350 * in the example above, and has enough space to hold 40-byte hex 351 * SHA1, an extra slash for the first level indirection, and the 352 * terminating NUL. 353 */ 354static intlink_alt_odb_entry(const char*entry,const char*relative_base, 355int depth,const char*normalized_objdir) 356{ 357struct alternate_object_database *ent; 358struct strbuf pathbuf = STRBUF_INIT; 359 360if(!is_absolute_path(entry) && relative_base) { 361strbuf_realpath(&pathbuf, relative_base,1); 362strbuf_addch(&pathbuf,'/'); 363} 364strbuf_addstr(&pathbuf, entry); 365 366if(strbuf_normalize_path(&pathbuf) <0&& relative_base) { 367error("unable to normalize alternate object path:%s", 368 pathbuf.buf); 369strbuf_release(&pathbuf); 370return-1; 371} 372 373/* 374 * The trailing slash after the directory name is given by 375 * this function at the end. Remove duplicates. 376 */ 377while(pathbuf.len && pathbuf.buf[pathbuf.len -1] =='/') 378strbuf_setlen(&pathbuf, pathbuf.len -1); 379 380if(!alt_odb_usable(&pathbuf, normalized_objdir)) { 381strbuf_release(&pathbuf); 382return-1; 383} 384 385 ent =alloc_alt_odb(pathbuf.buf); 386 387/* add the alternate entry */ 388*alt_odb_tail = ent; 389 alt_odb_tail = &(ent->next); 390 ent->next = NULL; 391 392/* recursively add alternates */ 393read_info_alternates(pathbuf.buf, depth +1); 394 395strbuf_release(&pathbuf); 396return0; 397} 398 399static const char*parse_alt_odb_entry(const char*string, 400int sep, 401struct strbuf *out) 402{ 403const char*end; 404 405strbuf_reset(out); 406 407if(*string =='#') { 408/* comment; consume up to next separator */ 409 end =strchrnul(string, sep); 410}else if(*string =='"'&& !unquote_c_style(out, string, &end)) { 411/* 412 * quoted path; unquote_c_style has copied the 413 * data for us and set "end". Broken quoting (e.g., 414 * an entry that doesn't end with a quote) falls 415 * back to the unquoted case below. 416 */ 417}else{ 418/* normal, unquoted path */ 419 end =strchrnul(string, sep); 420strbuf_add(out, string, end - string); 421} 422 423if(*end) 424 end++; 425return end; 426} 427 428static voidlink_alt_odb_entries(const char*alt,int len,int sep, 429const char*relative_base,int depth) 430{ 431struct strbuf objdirbuf = STRBUF_INIT; 432struct strbuf entry = STRBUF_INIT; 433 434if(depth >5) { 435error("%s: ignoring alternate object stores, nesting too deep.", 436 relative_base); 437return; 438} 439 440strbuf_add_absolute_path(&objdirbuf,get_object_directory()); 441if(strbuf_normalize_path(&objdirbuf) <0) 442die("unable to normalize object directory:%s", 443 objdirbuf.buf); 444 445while(*alt) { 446 alt =parse_alt_odb_entry(alt, sep, &entry); 447if(!entry.len) 448continue; 449link_alt_odb_entry(entry.buf, relative_base, depth, objdirbuf.buf); 450} 451strbuf_release(&entry); 452strbuf_release(&objdirbuf); 453} 454 455voidread_info_alternates(const char* relative_base,int depth) 456{ 457char*map; 458size_t mapsz; 459struct stat st; 460char*path; 461int fd; 462 463 path =xstrfmt("%s/info/alternates", relative_base); 464 fd =git_open(path); 465free(path); 466if(fd <0) 467return; 468if(fstat(fd, &st) || (st.st_size ==0)) { 469close(fd); 470return; 471} 472 mapsz =xsize_t(st.st_size); 473 map =xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd,0); 474close(fd); 475 476link_alt_odb_entries(map, mapsz,'\n', relative_base, depth); 477 478munmap(map, mapsz); 479} 480 481struct alternate_object_database *alloc_alt_odb(const char*dir) 482{ 483struct alternate_object_database *ent; 484 485FLEX_ALLOC_STR(ent, path, dir); 486strbuf_init(&ent->scratch,0); 487strbuf_addf(&ent->scratch,"%s/", dir); 488 ent->base_len = ent->scratch.len; 489 490return ent; 491} 492 493voidadd_to_alternates_file(const char*reference) 494{ 495struct lock_file *lock =xcalloc(1,sizeof(struct lock_file)); 496char*alts =git_pathdup("objects/info/alternates"); 497FILE*in, *out; 498 499hold_lock_file_for_update(lock, alts, LOCK_DIE_ON_ERROR); 500 out =fdopen_lock_file(lock,"w"); 501if(!out) 502die_errno("unable to fdopen alternates lockfile"); 503 504 in =fopen(alts,"r"); 505if(in) { 506struct strbuf line = STRBUF_INIT; 507int found =0; 508 509while(strbuf_getline(&line, in) != EOF) { 510if(!strcmp(reference, line.buf)) { 511 found =1; 512break; 513} 514fprintf_or_die(out,"%s\n", line.buf); 515} 516 517strbuf_release(&line); 518fclose(in); 519 520if(found) { 521rollback_lock_file(lock); 522 lock = NULL; 523} 524} 525else if(errno != ENOENT) 526die_errno("unable to read alternates file"); 527 528if(lock) { 529fprintf_or_die(out,"%s\n", reference); 530if(commit_lock_file(lock)) 531die_errno("unable to move new alternates file into place"); 532if(alt_odb_tail) 533link_alt_odb_entries(reference,strlen(reference),'\n', NULL,0); 534} 535free(alts); 536} 537 538voidadd_to_alternates_memory(const char*reference) 539{ 540/* 541 * Make sure alternates are initialized, or else our entry may be 542 * overwritten when they are. 543 */ 544prepare_alt_odb(); 545 546link_alt_odb_entries(reference,strlen(reference),'\n', NULL,0); 547} 548 549/* 550 * Compute the exact path an alternate is at and returns it. In case of 551 * error NULL is returned and the human readable error is added to `err` 552 * `path` may be relative and should point to $GITDIR. 553 * `err` must not be null. 554 */ 555char*compute_alternate_path(const char*path,struct strbuf *err) 556{ 557char*ref_git = NULL; 558const char*repo, *ref_git_s; 559int seen_error =0; 560 561 ref_git_s =real_path_if_valid(path); 562if(!ref_git_s) { 563 seen_error =1; 564strbuf_addf(err,_("path '%s' does not exist"), path); 565goto out; 566}else 567/* 568 * Beware: read_gitfile(), real_path() and mkpath() 569 * return static buffer 570 */ 571 ref_git =xstrdup(ref_git_s); 572 573 repo =read_gitfile(ref_git); 574if(!repo) 575 repo =read_gitfile(mkpath("%s/.git", ref_git)); 576if(repo) { 577free(ref_git); 578 ref_git =xstrdup(repo); 579} 580 581if(!repo &&is_directory(mkpath("%s/.git/objects", ref_git))) { 582char*ref_git_git =mkpathdup("%s/.git", ref_git); 583free(ref_git); 584 ref_git = ref_git_git; 585}else if(!is_directory(mkpath("%s/objects", ref_git))) { 586struct strbuf sb = STRBUF_INIT; 587 seen_error =1; 588if(get_common_dir(&sb, ref_git)) { 589strbuf_addf(err, 590_("reference repository '%s' as a linked " 591"checkout is not supported yet."), 592 path); 593goto out; 594} 595 596strbuf_addf(err,_("reference repository '%s' is not a " 597"local repository."), path); 598goto out; 599} 600 601if(!access(mkpath("%s/shallow", ref_git), F_OK)) { 602strbuf_addf(err,_("reference repository '%s' is shallow"), 603 path); 604 seen_error =1; 605goto out; 606} 607 608if(!access(mkpath("%s/info/grafts", ref_git), F_OK)) { 609strbuf_addf(err, 610_("reference repository '%s' is grafted"), 611 path); 612 seen_error =1; 613goto out; 614} 615 616out: 617if(seen_error) { 618free(ref_git); 619 ref_git = NULL; 620} 621 622return ref_git; 623} 624 625intforeach_alt_odb(alt_odb_fn fn,void*cb) 626{ 627struct alternate_object_database *ent; 628int r =0; 629 630prepare_alt_odb(); 631for(ent = alt_odb_list; ent; ent = ent->next) { 632 r =fn(ent, cb); 633if(r) 634break; 635} 636return r; 637} 638 639voidprepare_alt_odb(void) 640{ 641const char*alt; 642 643if(alt_odb_tail) 644return; 645 646 alt =getenv(ALTERNATE_DB_ENVIRONMENT); 647if(!alt) alt =""; 648 649 alt_odb_tail = &alt_odb_list; 650link_alt_odb_entries(alt,strlen(alt), PATH_SEP, NULL,0); 651 652read_info_alternates(get_object_directory(),0); 653} 654 655/* Returns 1 if we have successfully freshened the file, 0 otherwise. */ 656static intfreshen_file(const char*fn) 657{ 658struct utimbuf t; 659 t.actime = t.modtime =time(NULL); 660return!utime(fn, &t); 661} 662 663/* 664 * All of the check_and_freshen functions return 1 if the file exists and was 665 * freshened (if freshening was requested), 0 otherwise. If they return 666 * 0, you should not assume that it is safe to skip a write of the object (it 667 * either does not exist on disk, or has a stale mtime and may be subject to 668 * pruning). 669 */ 670static intcheck_and_freshen_file(const char*fn,int freshen) 671{ 672if(access(fn, F_OK)) 673return0; 674if(freshen && !freshen_file(fn)) 675return0; 676return1; 677} 678 679static intcheck_and_freshen_local(const unsigned char*sha1,int freshen) 680{ 681returncheck_and_freshen_file(sha1_file_name(sha1), freshen); 682} 683 684static intcheck_and_freshen_nonlocal(const unsigned char*sha1,int freshen) 685{ 686struct alternate_object_database *alt; 687prepare_alt_odb(); 688for(alt = alt_odb_list; alt; alt = alt->next) { 689const char*path =alt_sha1_path(alt, sha1); 690if(check_and_freshen_file(path, freshen)) 691return1; 692} 693return0; 694} 695 696static intcheck_and_freshen(const unsigned char*sha1,int freshen) 697{ 698returncheck_and_freshen_local(sha1, freshen) || 699check_and_freshen_nonlocal(sha1, freshen); 700} 701 702inthas_loose_object_nonlocal(const unsigned char*sha1) 703{ 704returncheck_and_freshen_nonlocal(sha1,0); 705} 706 707static inthas_loose_object(const unsigned char*sha1) 708{ 709returncheck_and_freshen(sha1,0); 710} 711 712static unsigned int pack_used_ctr; 713static unsigned int pack_mmap_calls; 714static unsigned int peak_pack_open_windows; 715static unsigned int pack_open_windows; 716static unsigned int pack_open_fds; 717static unsigned int pack_max_fds; 718static size_t peak_pack_mapped; 719static size_t pack_mapped; 720struct packed_git *packed_git; 721 722static struct mru packed_git_mru_storage; 723struct mru *packed_git_mru = &packed_git_mru_storage; 724 725voidpack_report(void) 726{ 727fprintf(stderr, 728"pack_report: getpagesize() =%10" SZ_FMT "\n" 729"pack_report: core.packedGitWindowSize =%10" SZ_FMT "\n" 730"pack_report: core.packedGitLimit =%10" SZ_FMT "\n", 731sz_fmt(getpagesize()), 732sz_fmt(packed_git_window_size), 733sz_fmt(packed_git_limit)); 734fprintf(stderr, 735"pack_report: pack_used_ctr =%10u\n" 736"pack_report: pack_mmap_calls =%10u\n" 737"pack_report: pack_open_windows =%10u /%10u\n" 738"pack_report: pack_mapped = " 739"%10" SZ_FMT " /%10" SZ_FMT "\n", 740 pack_used_ctr, 741 pack_mmap_calls, 742 pack_open_windows, peak_pack_open_windows, 743sz_fmt(pack_mapped),sz_fmt(peak_pack_mapped)); 744} 745 746/* 747 * Open and mmap the index file at path, perform a couple of 748 * consistency checks, then record its information to p. Return 0 on 749 * success. 750 */ 751static intcheck_packed_git_idx(const char*path,struct packed_git *p) 752{ 753void*idx_map; 754struct pack_idx_header *hdr; 755size_t idx_size; 756uint32_t version, nr, i, *index; 757int fd =git_open(path); 758struct stat st; 759 760if(fd <0) 761return-1; 762if(fstat(fd, &st)) { 763close(fd); 764return-1; 765} 766 idx_size =xsize_t(st.st_size); 767if(idx_size <4*256+20+20) { 768close(fd); 769returnerror("index file%sis too small", path); 770} 771 idx_map =xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd,0); 772close(fd); 773 774 hdr = idx_map; 775if(hdr->idx_signature ==htonl(PACK_IDX_SIGNATURE)) { 776 version =ntohl(hdr->idx_version); 777if(version <2|| version >2) { 778munmap(idx_map, idx_size); 779returnerror("index file%sis version %"PRIu32 780" and is not supported by this binary" 781" (try upgrading GIT to a newer version)", 782 path, version); 783} 784}else 785 version =1; 786 787 nr =0; 788 index = idx_map; 789if(version >1) 790 index +=2;/* skip index header */ 791for(i =0; i <256; i++) { 792uint32_t n =ntohl(index[i]); 793if(n < nr) { 794munmap(idx_map, idx_size); 795returnerror("non-monotonic index%s", path); 796} 797 nr = n; 798} 799 800if(version ==1) { 801/* 802 * Total size: 803 * - 256 index entries 4 bytes each 804 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset) 805 * - 20-byte SHA1 of the packfile 806 * - 20-byte SHA1 file checksum 807 */ 808if(idx_size !=4*256+ nr *24+20+20) { 809munmap(idx_map, idx_size); 810returnerror("wrong index v1 file size in%s", path); 811} 812}else if(version ==2) { 813/* 814 * Minimum size: 815 * - 8 bytes of header 816 * - 256 index entries 4 bytes each 817 * - 20-byte sha1 entry * nr 818 * - 4-byte crc entry * nr 819 * - 4-byte offset entry * nr 820 * - 20-byte SHA1 of the packfile 821 * - 20-byte SHA1 file checksum 822 * And after the 4-byte offset table might be a 823 * variable sized table containing 8-byte entries 824 * for offsets larger than 2^31. 825 */ 826unsigned long min_size =8+4*256+ nr*(20+4+4) +20+20; 827unsigned long max_size = min_size; 828if(nr) 829 max_size += (nr -1)*8; 830if(idx_size < min_size || idx_size > max_size) { 831munmap(idx_map, idx_size); 832returnerror("wrong index v2 file size in%s", path); 833} 834if(idx_size != min_size && 835/* 836 * make sure we can deal with large pack offsets. 837 * 31-bit signed offset won't be enough, neither 838 * 32-bit unsigned one will be. 839 */ 840(sizeof(off_t) <=4)) { 841munmap(idx_map, idx_size); 842returnerror("pack too large for current definition of off_t in%s", path); 843} 844} 845 846 p->index_version = version; 847 p->index_data = idx_map; 848 p->index_size = idx_size; 849 p->num_objects = nr; 850return0; 851} 852 853intopen_pack_index(struct packed_git *p) 854{ 855char*idx_name; 856size_t len; 857int ret; 858 859if(p->index_data) 860return0; 861 862if(!strip_suffix(p->pack_name,".pack", &len)) 863die("BUG: pack_name does not end in .pack"); 864 idx_name =xstrfmt("%.*s.idx", (int)len, p->pack_name); 865 ret =check_packed_git_idx(idx_name, p); 866free(idx_name); 867return ret; 868} 869 870static voidscan_windows(struct packed_git *p, 871struct packed_git **lru_p, 872struct pack_window **lru_w, 873struct pack_window **lru_l) 874{ 875struct pack_window *w, *w_l; 876 877for(w_l = NULL, w = p->windows; w; w = w->next) { 878if(!w->inuse_cnt) { 879if(!*lru_w || w->last_used < (*lru_w)->last_used) { 880*lru_p = p; 881*lru_w = w; 882*lru_l = w_l; 883} 884} 885 w_l = w; 886} 887} 888 889static intunuse_one_window(struct packed_git *current) 890{ 891struct packed_git *p, *lru_p = NULL; 892struct pack_window *lru_w = NULL, *lru_l = NULL; 893 894if(current) 895scan_windows(current, &lru_p, &lru_w, &lru_l); 896for(p = packed_git; p; p = p->next) 897scan_windows(p, &lru_p, &lru_w, &lru_l); 898if(lru_p) { 899munmap(lru_w->base, lru_w->len); 900 pack_mapped -= lru_w->len; 901if(lru_l) 902 lru_l->next = lru_w->next; 903else 904 lru_p->windows = lru_w->next; 905free(lru_w); 906 pack_open_windows--; 907return1; 908} 909return0; 910} 911 912voidrelease_pack_memory(size_t need) 913{ 914size_t cur = pack_mapped; 915while(need >= (cur - pack_mapped) &&unuse_one_window(NULL)) 916;/* nothing */ 917} 918 919static voidmmap_limit_check(size_t length) 920{ 921static size_t limit =0; 922if(!limit) { 923 limit =git_env_ulong("GIT_MMAP_LIMIT",0); 924if(!limit) 925 limit = SIZE_MAX; 926} 927if(length > limit) 928die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX, 929(uintmax_t)length, (uintmax_t)limit); 930} 931 932void*xmmap_gently(void*start,size_t length, 933int prot,int flags,int fd, off_t offset) 934{ 935void*ret; 936 937mmap_limit_check(length); 938 ret =mmap(start, length, prot, flags, fd, offset); 939if(ret == MAP_FAILED) { 940if(!length) 941return NULL; 942release_pack_memory(length); 943 ret =mmap(start, length, prot, flags, fd, offset); 944} 945return ret; 946} 947 948void*xmmap(void*start,size_t length, 949int prot,int flags,int fd, off_t offset) 950{ 951void*ret =xmmap_gently(start, length, prot, flags, fd, offset); 952if(ret == MAP_FAILED) 953die_errno("mmap failed"); 954return ret; 955} 956 957voidclose_pack_windows(struct packed_git *p) 958{ 959while(p->windows) { 960struct pack_window *w = p->windows; 961 962if(w->inuse_cnt) 963die("pack '%s' still has open windows to it", 964 p->pack_name); 965munmap(w->base, w->len); 966 pack_mapped -= w->len; 967 pack_open_windows--; 968 p->windows = w->next; 969free(w); 970} 971} 972 973static intclose_pack_fd(struct packed_git *p) 974{ 975if(p->pack_fd <0) 976return0; 977 978close(p->pack_fd); 979 pack_open_fds--; 980 p->pack_fd = -1; 981 982return1; 983} 984 985static voidclose_pack(struct packed_git *p) 986{ 987close_pack_windows(p); 988close_pack_fd(p); 989close_pack_index(p); 990} 991 992voidclose_all_packs(void) 993{ 994struct packed_git *p; 995 996for(p = packed_git; p; p = p->next) 997if(p->do_not_close) 998die("BUG: want to close pack marked 'do-not-close'"); 999else1000close_pack(p);1001}100210031004/*1005 * The LRU pack is the one with the oldest MRU window, preferring packs1006 * with no used windows, or the oldest mtime if it has no windows allocated.1007 */1008static voidfind_lru_pack(struct packed_git *p,struct packed_git **lru_p,struct pack_window **mru_w,int*accept_windows_inuse)1009{1010struct pack_window *w, *this_mru_w;1011int has_windows_inuse =0;10121013/*1014 * Reject this pack if it has windows and the previously selected1015 * one does not. If this pack does not have windows, reject1016 * it if the pack file is newer than the previously selected one.1017 */1018if(*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))1019return;10201021for(w = this_mru_w = p->windows; w; w = w->next) {1022/*1023 * Reject this pack if any of its windows are in use,1024 * but the previously selected pack did not have any1025 * inuse windows. Otherwise, record that this pack1026 * has windows in use.1027 */1028if(w->inuse_cnt) {1029if(*accept_windows_inuse)1030 has_windows_inuse =1;1031else1032return;1033}10341035if(w->last_used > this_mru_w->last_used)1036 this_mru_w = w;10371038/*1039 * Reject this pack if it has windows that have been1040 * used more recently than the previously selected pack.1041 * If the previously selected pack had windows inuse and1042 * we have not encountered a window in this pack that is1043 * inuse, skip this check since we prefer a pack with no1044 * inuse windows to one that has inuse windows.1045 */1046if(*mru_w && *accept_windows_inuse == has_windows_inuse &&1047 this_mru_w->last_used > (*mru_w)->last_used)1048return;1049}10501051/*1052 * Select this pack.1053 */1054*mru_w = this_mru_w;1055*lru_p = p;1056*accept_windows_inuse = has_windows_inuse;1057}10581059static intclose_one_pack(void)1060{1061struct packed_git *p, *lru_p = NULL;1062struct pack_window *mru_w = NULL;1063int accept_windows_inuse =1;10641065for(p = packed_git; p; p = p->next) {1066if(p->pack_fd == -1)1067continue;1068find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);1069}10701071if(lru_p)1072returnclose_pack_fd(lru_p);10731074return0;1075}10761077voidunuse_pack(struct pack_window **w_cursor)1078{1079struct pack_window *w = *w_cursor;1080if(w) {1081 w->inuse_cnt--;1082*w_cursor = NULL;1083}1084}10851086voidclose_pack_index(struct packed_git *p)1087{1088if(p->index_data) {1089munmap((void*)p->index_data, p->index_size);1090 p->index_data = NULL;1091}1092}10931094static unsigned intget_max_fd_limit(void)1095{1096#ifdef RLIMIT_NOFILE1097{1098struct rlimit lim;10991100if(!getrlimit(RLIMIT_NOFILE, &lim))1101return lim.rlim_cur;1102}1103#endif11041105#ifdef _SC_OPEN_MAX1106{1107long open_max =sysconf(_SC_OPEN_MAX);1108if(0< open_max)1109return open_max;1110/*1111 * Otherwise, we got -1 for one of the two1112 * reasons:1113 *1114 * (1) sysconf() did not understand _SC_OPEN_MAX1115 * and signaled an error with -1; or1116 * (2) sysconf() said there is no limit.1117 *1118 * We _could_ clear errno before calling sysconf() to1119 * tell these two cases apart and return a huge number1120 * in the latter case to let the caller cap it to a1121 * value that is not so selfish, but letting the1122 * fallback OPEN_MAX codepath take care of these cases1123 * is a lot simpler.1124 */1125}1126#endif11271128#ifdef OPEN_MAX1129return OPEN_MAX;1130#else1131return1;/* see the caller ;-) */1132#endif1133}11341135/*1136 * Do not call this directly as this leaks p->pack_fd on error return;1137 * call open_packed_git() instead.1138 */1139static intopen_packed_git_1(struct packed_git *p)1140{1141struct stat st;1142struct pack_header hdr;1143unsigned char sha1[20];1144unsigned char*idx_sha1;1145long fd_flag;11461147if(!p->index_data &&open_pack_index(p))1148returnerror("packfile%sindex unavailable", p->pack_name);11491150if(!pack_max_fds) {1151unsigned int max_fds =get_max_fd_limit();11521153/* Save 3 for stdin/stdout/stderr, 22 for work */1154if(25< max_fds)1155 pack_max_fds = max_fds -25;1156else1157 pack_max_fds =1;1158}11591160while(pack_max_fds <= pack_open_fds &&close_one_pack())1161;/* nothing */11621163 p->pack_fd =git_open(p->pack_name);1164if(p->pack_fd <0||fstat(p->pack_fd, &st))1165return-1;1166 pack_open_fds++;11671168/* If we created the struct before we had the pack we lack size. */1169if(!p->pack_size) {1170if(!S_ISREG(st.st_mode))1171returnerror("packfile%snot a regular file", p->pack_name);1172 p->pack_size = st.st_size;1173}else if(p->pack_size != st.st_size)1174returnerror("packfile%ssize changed", p->pack_name);11751176/* We leave these file descriptors open with sliding mmap;1177 * there is no point keeping them open across exec(), though.1178 */1179 fd_flag =fcntl(p->pack_fd, F_GETFD,0);1180if(fd_flag <0)1181returnerror("cannot determine file descriptor flags");1182 fd_flag |= FD_CLOEXEC;1183if(fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)1184returnerror("cannot set FD_CLOEXEC");11851186/* Verify we recognize this pack file format. */1187if(read_in_full(p->pack_fd, &hdr,sizeof(hdr)) !=sizeof(hdr))1188returnerror("file%sis far too short to be a packfile", p->pack_name);1189if(hdr.hdr_signature !=htonl(PACK_SIGNATURE))1190returnerror("file%sis not a GIT packfile", p->pack_name);1191if(!pack_version_ok(hdr.hdr_version))1192returnerror("packfile%sis version %"PRIu32" and not"1193" supported (try upgrading GIT to a newer version)",1194 p->pack_name,ntohl(hdr.hdr_version));11951196/* Verify the pack matches its index. */1197if(p->num_objects !=ntohl(hdr.hdr_entries))1198returnerror("packfile%sclaims to have %"PRIu32" objects"1199" while index indicates %"PRIu32" objects",1200 p->pack_name,ntohl(hdr.hdr_entries),1201 p->num_objects);1202if(lseek(p->pack_fd, p->pack_size -sizeof(sha1), SEEK_SET) == -1)1203returnerror("end of packfile%sis unavailable", p->pack_name);1204if(read_in_full(p->pack_fd, sha1,sizeof(sha1)) !=sizeof(sha1))1205returnerror("packfile%ssignature is unavailable", p->pack_name);1206 idx_sha1 = ((unsigned char*)p->index_data) + p->index_size -40;1207if(hashcmp(sha1, idx_sha1))1208returnerror("packfile%sdoes not match index", p->pack_name);1209return0;1210}12111212static intopen_packed_git(struct packed_git *p)1213{1214if(!open_packed_git_1(p))1215return0;1216close_pack_fd(p);1217return-1;1218}12191220static intin_window(struct pack_window *win, off_t offset)1221{1222/* We must promise at least 20 bytes (one hash) after the1223 * offset is available from this window, otherwise the offset1224 * is not actually in this window and a different window (which1225 * has that one hash excess) must be used. This is to support1226 * the object header and delta base parsing routines below.1227 */1228 off_t win_off = win->offset;1229return win_off <= offset1230&& (offset +20) <= (win_off + win->len);1231}12321233unsigned char*use_pack(struct packed_git *p,1234struct pack_window **w_cursor,1235 off_t offset,1236unsigned long*left)1237{1238struct pack_window *win = *w_cursor;12391240/* Since packfiles end in a hash of their content and it's1241 * pointless to ask for an offset into the middle of that1242 * hash, and the in_window function above wouldn't match1243 * don't allow an offset too close to the end of the file.1244 */1245if(!p->pack_size && p->pack_fd == -1&&open_packed_git(p))1246die("packfile%scannot be accessed", p->pack_name);1247if(offset > (p->pack_size -20))1248die("offset beyond end of packfile (truncated pack?)");1249if(offset <0)1250die(_("offset before end of packfile (broken .idx?)"));12511252if(!win || !in_window(win, offset)) {1253if(win)1254 win->inuse_cnt--;1255for(win = p->windows; win; win = win->next) {1256if(in_window(win, offset))1257break;1258}1259if(!win) {1260size_t window_align = packed_git_window_size /2;1261 off_t len;12621263if(p->pack_fd == -1&&open_packed_git(p))1264die("packfile%scannot be accessed", p->pack_name);12651266 win =xcalloc(1,sizeof(*win));1267 win->offset = (offset / window_align) * window_align;1268 len = p->pack_size - win->offset;1269if(len > packed_git_window_size)1270 len = packed_git_window_size;1271 win->len = (size_t)len;1272 pack_mapped += win->len;1273while(packed_git_limit < pack_mapped1274&&unuse_one_window(p))1275;/* nothing */1276 win->base =xmmap(NULL, win->len,1277 PROT_READ, MAP_PRIVATE,1278 p->pack_fd, win->offset);1279if(win->base == MAP_FAILED)1280die_errno("packfile%scannot be mapped",1281 p->pack_name);1282if(!win->offset && win->len == p->pack_size1283&& !p->do_not_close)1284close_pack_fd(p);1285 pack_mmap_calls++;1286 pack_open_windows++;1287if(pack_mapped > peak_pack_mapped)1288 peak_pack_mapped = pack_mapped;1289if(pack_open_windows > peak_pack_open_windows)1290 peak_pack_open_windows = pack_open_windows;1291 win->next = p->windows;1292 p->windows = win;1293}1294}1295if(win != *w_cursor) {1296 win->last_used = pack_used_ctr++;1297 win->inuse_cnt++;1298*w_cursor = win;1299}1300 offset -= win->offset;1301if(left)1302*left = win->len -xsize_t(offset);1303return win->base + offset;1304}13051306static struct packed_git *alloc_packed_git(int extra)1307{1308struct packed_git *p =xmalloc(st_add(sizeof(*p), extra));1309memset(p,0,sizeof(*p));1310 p->pack_fd = -1;1311return p;1312}13131314static voidtry_to_free_pack_memory(size_t size)1315{1316release_pack_memory(size);1317}13181319struct packed_git *add_packed_git(const char*path,size_t path_len,int local)1320{1321static int have_set_try_to_free_routine;1322struct stat st;1323size_t alloc;1324struct packed_git *p;13251326if(!have_set_try_to_free_routine) {1327 have_set_try_to_free_routine =1;1328set_try_to_free_routine(try_to_free_pack_memory);1329}13301331/*1332 * Make sure a corresponding .pack file exists and that1333 * the index looks sane.1334 */1335if(!strip_suffix_mem(path, &path_len,".idx"))1336return NULL;13371338/*1339 * ".pack" is long enough to hold any suffix we're adding (and1340 * the use xsnprintf double-checks that)1341 */1342 alloc =st_add3(path_len,strlen(".pack"),1);1343 p =alloc_packed_git(alloc);1344memcpy(p->pack_name, path, path_len);13451346xsnprintf(p->pack_name + path_len, alloc - path_len,".keep");1347if(!access(p->pack_name, F_OK))1348 p->pack_keep =1;13491350xsnprintf(p->pack_name + path_len, alloc - path_len,".pack");1351if(stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {1352free(p);1353return NULL;1354}13551356/* ok, it looks sane as far as we can check without1357 * actually mapping the pack file.1358 */1359 p->pack_size = st.st_size;1360 p->pack_local = local;1361 p->mtime = st.st_mtime;1362if(path_len <40||get_sha1_hex(path + path_len -40, p->sha1))1363hashclr(p->sha1);1364return p;1365}13661367struct packed_git *parse_pack_index(unsigned char*sha1,const char*idx_path)1368{1369const char*path =sha1_pack_name(sha1);1370size_t alloc =st_add(strlen(path),1);1371struct packed_git *p =alloc_packed_git(alloc);13721373memcpy(p->pack_name, path, alloc);/* includes NUL */1374hashcpy(p->sha1, sha1);1375if(check_packed_git_idx(idx_path, p)) {1376free(p);1377return NULL;1378}13791380return p;1381}13821383voidinstall_packed_git(struct packed_git *pack)1384{1385if(pack->pack_fd != -1)1386 pack_open_fds++;13871388 pack->next = packed_git;1389 packed_git = pack;1390}13911392void(*report_garbage)(unsigned seen_bits,const char*path);13931394static voidreport_helper(const struct string_list *list,1395int seen_bits,int first,int last)1396{1397if(seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))1398return;13991400for(; first < last; first++)1401report_garbage(seen_bits, list->items[first].string);1402}14031404static voidreport_pack_garbage(struct string_list *list)1405{1406int i, baselen = -1, first =0, seen_bits =0;14071408if(!report_garbage)1409return;14101411string_list_sort(list);14121413for(i =0; i < list->nr; i++) {1414const char*path = list->items[i].string;1415if(baselen != -1&&1416strncmp(path, list->items[first].string, baselen)) {1417report_helper(list, seen_bits, first, i);1418 baselen = -1;1419 seen_bits =0;1420}1421if(baselen == -1) {1422const char*dot =strrchr(path,'.');1423if(!dot) {1424report_garbage(PACKDIR_FILE_GARBAGE, path);1425continue;1426}1427 baselen = dot - path +1;1428 first = i;1429}1430if(!strcmp(path + baselen,"pack"))1431 seen_bits |=1;1432else if(!strcmp(path + baselen,"idx"))1433 seen_bits |=2;1434}1435report_helper(list, seen_bits, first, list->nr);1436}14371438static voidprepare_packed_git_one(char*objdir,int local)1439{1440struct strbuf path = STRBUF_INIT;1441size_t dirnamelen;1442DIR*dir;1443struct dirent *de;1444struct string_list garbage = STRING_LIST_INIT_DUP;14451446strbuf_addstr(&path, objdir);1447strbuf_addstr(&path,"/pack");1448 dir =opendir(path.buf);1449if(!dir) {1450if(errno != ENOENT)1451error_errno("unable to open object pack directory:%s",1452 path.buf);1453strbuf_release(&path);1454return;1455}1456strbuf_addch(&path,'/');1457 dirnamelen = path.len;1458while((de =readdir(dir)) != NULL) {1459struct packed_git *p;1460size_t base_len;14611462if(is_dot_or_dotdot(de->d_name))1463continue;14641465strbuf_setlen(&path, dirnamelen);1466strbuf_addstr(&path, de->d_name);14671468 base_len = path.len;1469if(strip_suffix_mem(path.buf, &base_len,".idx")) {1470/* Don't reopen a pack we already have. */1471for(p = packed_git; p; p = p->next) {1472size_t len;1473if(strip_suffix(p->pack_name,".pack", &len) &&1474 len == base_len &&1475!memcmp(p->pack_name, path.buf, len))1476break;1477}1478if(p == NULL &&1479/*1480 * See if it really is a valid .idx file with1481 * corresponding .pack file that we can map.1482 */1483(p =add_packed_git(path.buf, path.len, local)) != NULL)1484install_packed_git(p);1485}14861487if(!report_garbage)1488continue;14891490if(ends_with(de->d_name,".idx") ||1491ends_with(de->d_name,".pack") ||1492ends_with(de->d_name,".bitmap") ||1493ends_with(de->d_name,".keep"))1494string_list_append(&garbage, path.buf);1495else1496report_garbage(PACKDIR_FILE_GARBAGE, path.buf);1497}1498closedir(dir);1499report_pack_garbage(&garbage);1500string_list_clear(&garbage,0);1501strbuf_release(&path);1502}15031504static int approximate_object_count_valid;15051506/*1507 * Give a fast, rough count of the number of objects in the repository. This1508 * ignores loose objects completely. If you have a lot of them, then either1509 * you should repack because your performance will be awful, or they are1510 * all unreachable objects about to be pruned, in which case they're not really1511 * interesting as a measure of repo size in the first place.1512 */1513unsigned longapproximate_object_count(void)1514{1515static unsigned long count;1516if(!approximate_object_count_valid) {1517struct packed_git *p;15181519prepare_packed_git();1520 count =0;1521for(p = packed_git; p; p = p->next) {1522if(open_pack_index(p))1523continue;1524 count += p->num_objects;1525}1526}1527return count;1528}15291530static void*get_next_packed_git(const void*p)1531{1532return((const struct packed_git *)p)->next;1533}15341535static voidset_next_packed_git(void*p,void*next)1536{1537((struct packed_git *)p)->next = next;1538}15391540static intsort_pack(const void*a_,const void*b_)1541{1542const struct packed_git *a = a_;1543const struct packed_git *b = b_;1544int st;15451546/*1547 * Local packs tend to contain objects specific to our1548 * variant of the project than remote ones. In addition,1549 * remote ones could be on a network mounted filesystem.1550 * Favor local ones for these reasons.1551 */1552 st = a->pack_local - b->pack_local;1553if(st)1554return-st;15551556/*1557 * Younger packs tend to contain more recent objects,1558 * and more recent objects tend to get accessed more1559 * often.1560 */1561if(a->mtime < b->mtime)1562return1;1563else if(a->mtime == b->mtime)1564return0;1565return-1;1566}15671568static voidrearrange_packed_git(void)1569{1570 packed_git =llist_mergesort(packed_git, get_next_packed_git,1571 set_next_packed_git, sort_pack);1572}15731574static voidprepare_packed_git_mru(void)1575{1576struct packed_git *p;15771578mru_clear(packed_git_mru);1579for(p = packed_git; p; p = p->next)1580mru_append(packed_git_mru, p);1581}15821583static int prepare_packed_git_run_once =0;1584voidprepare_packed_git(void)1585{1586struct alternate_object_database *alt;15871588if(prepare_packed_git_run_once)1589return;1590prepare_packed_git_one(get_object_directory(),1);1591prepare_alt_odb();1592for(alt = alt_odb_list; alt; alt = alt->next)1593prepare_packed_git_one(alt->path,0);1594rearrange_packed_git();1595prepare_packed_git_mru();1596 prepare_packed_git_run_once =1;1597}15981599voidreprepare_packed_git(void)1600{1601 approximate_object_count_valid =0;1602 prepare_packed_git_run_once =0;1603prepare_packed_git();1604}16051606static voidmark_bad_packed_object(struct packed_git *p,1607const unsigned char*sha1)1608{1609unsigned i;1610for(i =0; i < p->num_bad_objects; i++)1611if(!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))1612return;1613 p->bad_object_sha1 =xrealloc(p->bad_object_sha1,1614st_mult(GIT_SHA1_RAWSZ,1615st_add(p->num_bad_objects,1)));1616hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);1617 p->num_bad_objects++;1618}16191620static const struct packed_git *has_packed_and_bad(const unsigned char*sha1)1621{1622struct packed_git *p;1623unsigned i;16241625for(p = packed_git; p; p = p->next)1626for(i =0; i < p->num_bad_objects; i++)1627if(!hashcmp(sha1, p->bad_object_sha1 +20* i))1628return p;1629return NULL;1630}16311632/*1633 * With an in-core object data in "map", rehash it to make sure the1634 * object name actually matches "sha1" to detect object corruption.1635 * With "map" == NULL, try reading the object named with "sha1" using1636 * the streaming interface and rehash it to do the same.1637 */1638intcheck_sha1_signature(const unsigned char*sha1,void*map,1639unsigned long size,const char*type)1640{1641unsigned char real_sha1[20];1642enum object_type obj_type;1643struct git_istream *st;1644 git_SHA_CTX c;1645char hdr[32];1646int hdrlen;16471648if(map) {1649hash_sha1_file(map, size, type, real_sha1);1650returnhashcmp(sha1, real_sha1) ? -1:0;1651}16521653 st =open_istream(sha1, &obj_type, &size, NULL);1654if(!st)1655return-1;16561657/* Generate the header */1658 hdrlen =xsnprintf(hdr,sizeof(hdr),"%s %lu",typename(obj_type), size) +1;16591660/* Sha1.. */1661git_SHA1_Init(&c);1662git_SHA1_Update(&c, hdr, hdrlen);1663for(;;) {1664char buf[1024*16];1665 ssize_t readlen =read_istream(st, buf,sizeof(buf));16661667if(readlen <0) {1668close_istream(st);1669return-1;1670}1671if(!readlen)1672break;1673git_SHA1_Update(&c, buf, readlen);1674}1675git_SHA1_Final(real_sha1, &c);1676close_istream(st);1677returnhashcmp(sha1, real_sha1) ? -1:0;1678}16791680intgit_open_cloexec(const char*name,int flags)1681{1682int fd;1683static int o_cloexec = O_CLOEXEC;16841685 fd =open(name, flags | o_cloexec);1686if((o_cloexec & O_CLOEXEC) && fd <0&& errno == EINVAL) {1687/* Try again w/o O_CLOEXEC: the kernel might not support it */1688 o_cloexec &= ~O_CLOEXEC;1689 fd =open(name, flags | o_cloexec);1690}16911692#if defined(F_GETFL) && defined(F_SETFL) && defined(FD_CLOEXEC)1693{1694static int fd_cloexec = FD_CLOEXEC;16951696if(!o_cloexec &&0<= fd && fd_cloexec) {1697/* Opened w/o O_CLOEXEC? try with fcntl(2) to add it */1698int flags =fcntl(fd, F_GETFL);1699if(fcntl(fd, F_SETFL, flags | fd_cloexec))1700 fd_cloexec =0;1701}1702}1703#endif1704return fd;1705}17061707/*1708 * Find "sha1" as a loose object in the local repository or in an alternate.1709 * Returns 0 on success, negative on failure.1710 *1711 * The "path" out-parameter will give the path of the object we found (if any).1712 * Note that it may point to static storage and is only valid until another1713 * call to sha1_file_name(), etc.1714 */1715static intstat_sha1_file(const unsigned char*sha1,struct stat *st,1716const char**path)1717{1718struct alternate_object_database *alt;17191720*path =sha1_file_name(sha1);1721if(!lstat(*path, st))1722return0;17231724prepare_alt_odb();1725 errno = ENOENT;1726for(alt = alt_odb_list; alt; alt = alt->next) {1727*path =alt_sha1_path(alt, sha1);1728if(!lstat(*path, st))1729return0;1730}17311732return-1;1733}17341735/*1736 * Like stat_sha1_file(), but actually open the object and return the1737 * descriptor. See the caveats on the "path" parameter above.1738 */1739static intopen_sha1_file(const unsigned char*sha1,const char**path)1740{1741int fd;1742struct alternate_object_database *alt;1743int most_interesting_errno;17441745*path =sha1_file_name(sha1);1746 fd =git_open(*path);1747if(fd >=0)1748return fd;1749 most_interesting_errno = errno;17501751prepare_alt_odb();1752for(alt = alt_odb_list; alt; alt = alt->next) {1753*path =alt_sha1_path(alt, sha1);1754 fd =git_open(*path);1755if(fd >=0)1756return fd;1757if(most_interesting_errno == ENOENT)1758 most_interesting_errno = errno;1759}1760 errno = most_interesting_errno;1761return-1;1762}17631764/*1765 * Map the loose object at "path" if it is not NULL, or the path found by1766 * searching for a loose object named "sha1".1767 */1768static void*map_sha1_file_1(const char*path,1769const unsigned char*sha1,1770unsigned long*size)1771{1772void*map;1773int fd;17741775if(path)1776 fd =git_open(path);1777else1778 fd =open_sha1_file(sha1, &path);1779 map = NULL;1780if(fd >=0) {1781struct stat st;17821783if(!fstat(fd, &st)) {1784*size =xsize_t(st.st_size);1785if(!*size) {1786/* mmap() is forbidden on empty files */1787error("object file%sis empty", path);1788return NULL;1789}1790 map =xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd,0);1791}1792close(fd);1793}1794return map;1795}17961797void*map_sha1_file(const unsigned char*sha1,unsigned long*size)1798{1799returnmap_sha1_file_1(NULL, sha1, size);1800}18011802unsigned longunpack_object_header_buffer(const unsigned char*buf,1803unsigned long len,enum object_type *type,unsigned long*sizep)1804{1805unsigned shift;1806unsigned long size, c;1807unsigned long used =0;18081809 c = buf[used++];1810*type = (c >>4) &7;1811 size = c &15;1812 shift =4;1813while(c &0x80) {1814if(len <= used ||bitsizeof(long) <= shift) {1815error("bad object header");1816 size = used =0;1817break;1818}1819 c = buf[used++];1820 size += (c &0x7f) << shift;1821 shift +=7;1822}1823*sizep = size;1824return used;1825}18261827static intunpack_sha1_short_header(git_zstream *stream,1828unsigned char*map,unsigned long mapsize,1829void*buffer,unsigned long bufsiz)1830{1831/* Get the data stream */1832memset(stream,0,sizeof(*stream));1833 stream->next_in = map;1834 stream->avail_in = mapsize;1835 stream->next_out = buffer;1836 stream->avail_out = bufsiz;18371838git_inflate_init(stream);1839returngit_inflate(stream,0);1840}18411842intunpack_sha1_header(git_zstream *stream,1843unsigned char*map,unsigned long mapsize,1844void*buffer,unsigned long bufsiz)1845{1846int status =unpack_sha1_short_header(stream, map, mapsize,1847 buffer, bufsiz);18481849if(status < Z_OK)1850return status;18511852/* Make sure we have the terminating NUL */1853if(!memchr(buffer,'\0', stream->next_out - (unsigned char*)buffer))1854return-1;1855return0;1856}18571858static intunpack_sha1_header_to_strbuf(git_zstream *stream,unsigned char*map,1859unsigned long mapsize,void*buffer,1860unsigned long bufsiz,struct strbuf *header)1861{1862int status;18631864 status =unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz);1865if(status < Z_OK)1866return-1;18671868/*1869 * Check if entire header is unpacked in the first iteration.1870 */1871if(memchr(buffer,'\0', stream->next_out - (unsigned char*)buffer))1872return0;18731874/*1875 * buffer[0..bufsiz] was not large enough. Copy the partial1876 * result out to header, and then append the result of further1877 * reading the stream.1878 */1879strbuf_add(header, buffer, stream->next_out - (unsigned char*)buffer);1880 stream->next_out = buffer;1881 stream->avail_out = bufsiz;18821883do{1884 status =git_inflate(stream,0);1885strbuf_add(header, buffer, stream->next_out - (unsigned char*)buffer);1886if(memchr(buffer,'\0', stream->next_out - (unsigned char*)buffer))1887return0;1888 stream->next_out = buffer;1889 stream->avail_out = bufsiz;1890}while(status != Z_STREAM_END);1891return-1;1892}18931894static void*unpack_sha1_rest(git_zstream *stream,void*buffer,unsigned long size,const unsigned char*sha1)1895{1896int bytes =strlen(buffer) +1;1897unsigned char*buf =xmallocz(size);1898unsigned long n;1899int status = Z_OK;19001901 n = stream->total_out - bytes;1902if(n > size)1903 n = size;1904memcpy(buf, (char*) buffer + bytes, n);1905 bytes = n;1906if(bytes <= size) {1907/*1908 * The above condition must be (bytes <= size), not1909 * (bytes < size). In other words, even though we1910 * expect no more output and set avail_out to zero,1911 * the input zlib stream may have bytes that express1912 * "this concludes the stream", and we *do* want to1913 * eat that input.1914 *1915 * Otherwise we would not be able to test that we1916 * consumed all the input to reach the expected size;1917 * we also want to check that zlib tells us that all1918 * went well with status == Z_STREAM_END at the end.1919 */1920 stream->next_out = buf + bytes;1921 stream->avail_out = size - bytes;1922while(status == Z_OK)1923 status =git_inflate(stream, Z_FINISH);1924}1925if(status == Z_STREAM_END && !stream->avail_in) {1926git_inflate_end(stream);1927return buf;1928}19291930if(status <0)1931error("corrupt loose object '%s'",sha1_to_hex(sha1));1932else if(stream->avail_in)1933error("garbage at end of loose object '%s'",1934sha1_to_hex(sha1));1935free(buf);1936return NULL;1937}19381939/*1940 * We used to just use "sscanf()", but that's actually way1941 * too permissive for what we want to check. So do an anal1942 * object header parse by hand.1943 */1944static intparse_sha1_header_extended(const char*hdr,struct object_info *oi,1945unsigned int flags)1946{1947const char*type_buf = hdr;1948unsigned long size;1949int type, type_len =0;19501951/*1952 * The type can be of any size but is followed by1953 * a space.1954 */1955for(;;) {1956char c = *hdr++;1957if(!c)1958return-1;1959if(c ==' ')1960break;1961 type_len++;1962}19631964 type =type_from_string_gently(type_buf, type_len,1);1965if(oi->typename)1966strbuf_add(oi->typename, type_buf, type_len);1967/*1968 * Set type to 0 if its an unknown object and1969 * we're obtaining the type using '--allow-unknown-type'1970 * option.1971 */1972if((flags & LOOKUP_UNKNOWN_OBJECT) && (type <0))1973 type =0;1974else if(type <0)1975die("invalid object type");1976if(oi->typep)1977*oi->typep = type;19781979/*1980 * The length must follow immediately, and be in canonical1981 * decimal format (ie "010" is not valid).1982 */1983 size = *hdr++ -'0';1984if(size >9)1985return-1;1986if(size) {1987for(;;) {1988unsigned long c = *hdr -'0';1989if(c >9)1990break;1991 hdr++;1992 size = size *10+ c;1993}1994}19951996if(oi->sizep)1997*oi->sizep = size;19981999/*2000 * The length must be followed by a zero byte2001 */2002return*hdr ? -1: type;2003}20042005intparse_sha1_header(const char*hdr,unsigned long*sizep)2006{2007struct object_info oi = OBJECT_INFO_INIT;20082009 oi.sizep = sizep;2010returnparse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);2011}20122013static void*unpack_sha1_file(void*map,unsigned long mapsize,enum object_type *type,unsigned long*size,const unsigned char*sha1)2014{2015int ret;2016 git_zstream stream;2017char hdr[8192];20182019 ret =unpack_sha1_header(&stream, map, mapsize, hdr,sizeof(hdr));2020if(ret < Z_OK || (*type =parse_sha1_header(hdr, size)) <0)2021return NULL;20222023returnunpack_sha1_rest(&stream, hdr, *size, sha1);2024}20252026unsigned longget_size_from_delta(struct packed_git *p,2027struct pack_window **w_curs,2028 off_t curpos)2029{2030const unsigned char*data;2031unsigned char delta_head[20], *in;2032 git_zstream stream;2033int st;20342035memset(&stream,0,sizeof(stream));2036 stream.next_out = delta_head;2037 stream.avail_out =sizeof(delta_head);20382039git_inflate_init(&stream);2040do{2041 in =use_pack(p, w_curs, curpos, &stream.avail_in);2042 stream.next_in = in;2043 st =git_inflate(&stream, Z_FINISH);2044 curpos += stream.next_in - in;2045}while((st == Z_OK || st == Z_BUF_ERROR) &&2046 stream.total_out <sizeof(delta_head));2047git_inflate_end(&stream);2048if((st != Z_STREAM_END) && stream.total_out !=sizeof(delta_head)) {2049error("delta data unpack-initial failed");2050return0;2051}20522053/* Examine the initial part of the delta to figure out2054 * the result size.2055 */2056 data = delta_head;20572058/* ignore base size */2059get_delta_hdr_size(&data, delta_head+sizeof(delta_head));20602061/* Read the result size */2062returnget_delta_hdr_size(&data, delta_head+sizeof(delta_head));2063}20642065static off_t get_delta_base(struct packed_git *p,2066struct pack_window **w_curs,2067 off_t *curpos,2068enum object_type type,2069 off_t delta_obj_offset)2070{2071unsigned char*base_info =use_pack(p, w_curs, *curpos, NULL);2072 off_t base_offset;20732074/* use_pack() assured us we have [base_info, base_info + 20)2075 * as a range that we can look at without walking off the2076 * end of the mapped window. Its actually the hash size2077 * that is assured. An OFS_DELTA longer than the hash size2078 * is stupid, as then a REF_DELTA would be smaller to store.2079 */2080if(type == OBJ_OFS_DELTA) {2081unsigned used =0;2082unsigned char c = base_info[used++];2083 base_offset = c &127;2084while(c &128) {2085 base_offset +=1;2086if(!base_offset ||MSB(base_offset,7))2087return0;/* overflow */2088 c = base_info[used++];2089 base_offset = (base_offset <<7) + (c &127);2090}2091 base_offset = delta_obj_offset - base_offset;2092if(base_offset <=0|| base_offset >= delta_obj_offset)2093return0;/* out of bound */2094*curpos += used;2095}else if(type == OBJ_REF_DELTA) {2096/* The base entry _must_ be in the same pack */2097 base_offset =find_pack_entry_one(base_info, p);2098*curpos +=20;2099}else2100die("I am totally screwed");2101return base_offset;2102}21032104/*2105 * Like get_delta_base above, but we return the sha1 instead of the pack2106 * offset. This means it is cheaper for REF deltas (we do not have to do2107 * the final object lookup), but more expensive for OFS deltas (we2108 * have to load the revidx to convert the offset back into a sha1).2109 */2110static const unsigned char*get_delta_base_sha1(struct packed_git *p,2111struct pack_window **w_curs,2112 off_t curpos,2113enum object_type type,2114 off_t delta_obj_offset)2115{2116if(type == OBJ_REF_DELTA) {2117unsigned char*base =use_pack(p, w_curs, curpos, NULL);2118return base;2119}else if(type == OBJ_OFS_DELTA) {2120struct revindex_entry *revidx;2121 off_t base_offset =get_delta_base(p, w_curs, &curpos,2122 type, delta_obj_offset);21232124if(!base_offset)2125return NULL;21262127 revidx =find_pack_revindex(p, base_offset);2128if(!revidx)2129return NULL;21302131returnnth_packed_object_sha1(p, revidx->nr);2132}else2133return NULL;2134}21352136intunpack_object_header(struct packed_git *p,2137struct pack_window **w_curs,2138 off_t *curpos,2139unsigned long*sizep)2140{2141unsigned char*base;2142unsigned long left;2143unsigned long used;2144enum object_type type;21452146/* use_pack() assures us we have [base, base + 20) available2147 * as a range that we can look at. (Its actually the hash2148 * size that is assured.) With our object header encoding2149 * the maximum deflated object size is 2^137, which is just2150 * insane, so we know won't exceed what we have been given.2151 */2152 base =use_pack(p, w_curs, *curpos, &left);2153 used =unpack_object_header_buffer(base, left, &type, sizep);2154if(!used) {2155 type = OBJ_BAD;2156}else2157*curpos += used;21582159return type;2160}21612162static intretry_bad_packed_offset(struct packed_git *p, off_t obj_offset)2163{2164int type;2165struct revindex_entry *revidx;2166const unsigned char*sha1;2167 revidx =find_pack_revindex(p, obj_offset);2168if(!revidx)2169return OBJ_BAD;2170 sha1 =nth_packed_object_sha1(p, revidx->nr);2171mark_bad_packed_object(p, sha1);2172 type =sha1_object_info(sha1, NULL);2173if(type <= OBJ_NONE)2174return OBJ_BAD;2175return type;2176}21772178#define POI_STACK_PREALLOC 6421792180static enum object_type packed_to_object_type(struct packed_git *p,2181 off_t obj_offset,2182enum object_type type,2183struct pack_window **w_curs,2184 off_t curpos)2185{2186 off_t small_poi_stack[POI_STACK_PREALLOC];2187 off_t *poi_stack = small_poi_stack;2188int poi_stack_nr =0, poi_stack_alloc = POI_STACK_PREALLOC;21892190while(type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {2191 off_t base_offset;2192unsigned long size;2193/* Push the object we're going to leave behind */2194if(poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {2195 poi_stack_alloc =alloc_nr(poi_stack_nr);2196ALLOC_ARRAY(poi_stack, poi_stack_alloc);2197memcpy(poi_stack, small_poi_stack,sizeof(off_t)*poi_stack_nr);2198}else{2199ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);2200}2201 poi_stack[poi_stack_nr++] = obj_offset;2202/* If parsing the base offset fails, just unwind */2203 base_offset =get_delta_base(p, w_curs, &curpos, type, obj_offset);2204if(!base_offset)2205goto unwind;2206 curpos = obj_offset = base_offset;2207 type =unpack_object_header(p, w_curs, &curpos, &size);2208if(type <= OBJ_NONE) {2209/* If getting the base itself fails, we first2210 * retry the base, otherwise unwind */2211 type =retry_bad_packed_offset(p, base_offset);2212if(type > OBJ_NONE)2213goto out;2214goto unwind;2215}2216}22172218switch(type) {2219case OBJ_BAD:2220case OBJ_COMMIT:2221case OBJ_TREE:2222case OBJ_BLOB:2223case OBJ_TAG:2224break;2225default:2226error("unknown object type%iat offset %"PRIuMAX" in%s",2227 type, (uintmax_t)obj_offset, p->pack_name);2228 type = OBJ_BAD;2229}22302231out:2232if(poi_stack != small_poi_stack)2233free(poi_stack);2234return type;22352236unwind:2237while(poi_stack_nr) {2238 obj_offset = poi_stack[--poi_stack_nr];2239 type =retry_bad_packed_offset(p, obj_offset);2240if(type > OBJ_NONE)2241goto out;2242}2243 type = OBJ_BAD;2244goto out;2245}22462247intpacked_object_info(struct packed_git *p, off_t obj_offset,2248struct object_info *oi)2249{2250struct pack_window *w_curs = NULL;2251unsigned long size;2252 off_t curpos = obj_offset;2253enum object_type type;22542255/*2256 * We always get the representation type, but only convert it to2257 * a "real" type later if the caller is interested.2258 */2259 type =unpack_object_header(p, &w_curs, &curpos, &size);22602261if(oi->sizep) {2262if(type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {2263 off_t tmp_pos = curpos;2264 off_t base_offset =get_delta_base(p, &w_curs, &tmp_pos,2265 type, obj_offset);2266if(!base_offset) {2267 type = OBJ_BAD;2268goto out;2269}2270*oi->sizep =get_size_from_delta(p, &w_curs, tmp_pos);2271if(*oi->sizep ==0) {2272 type = OBJ_BAD;2273goto out;2274}2275}else{2276*oi->sizep = size;2277}2278}22792280if(oi->disk_sizep) {2281struct revindex_entry *revidx =find_pack_revindex(p, obj_offset);2282*oi->disk_sizep = revidx[1].offset - obj_offset;2283}22842285if(oi->typep) {2286*oi->typep =packed_to_object_type(p, obj_offset, type, &w_curs, curpos);2287if(*oi->typep <0) {2288 type = OBJ_BAD;2289goto out;2290}2291}22922293if(oi->delta_base_sha1) {2294if(type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {2295const unsigned char*base;22962297 base =get_delta_base_sha1(p, &w_curs, curpos,2298 type, obj_offset);2299if(!base) {2300 type = OBJ_BAD;2301goto out;2302}23032304hashcpy(oi->delta_base_sha1, base);2305}else2306hashclr(oi->delta_base_sha1);2307}23082309out:2310unuse_pack(&w_curs);2311return type;2312}23132314static void*unpack_compressed_entry(struct packed_git *p,2315struct pack_window **w_curs,2316 off_t curpos,2317unsigned long size)2318{2319int st;2320 git_zstream stream;2321unsigned char*buffer, *in;23222323 buffer =xmallocz_gently(size);2324if(!buffer)2325return NULL;2326memset(&stream,0,sizeof(stream));2327 stream.next_out = buffer;2328 stream.avail_out = size +1;23292330git_inflate_init(&stream);2331do{2332 in =use_pack(p, w_curs, curpos, &stream.avail_in);2333 stream.next_in = in;2334 st =git_inflate(&stream, Z_FINISH);2335if(!stream.avail_out)2336break;/* the payload is larger than it should be */2337 curpos += stream.next_in - in;2338}while(st == Z_OK || st == Z_BUF_ERROR);2339git_inflate_end(&stream);2340if((st != Z_STREAM_END) || stream.total_out != size) {2341free(buffer);2342return NULL;2343}23442345return buffer;2346}23472348static struct hashmap delta_base_cache;2349static size_t delta_base_cached;23502351staticLIST_HEAD(delta_base_cache_lru);23522353struct delta_base_cache_key {2354struct packed_git *p;2355 off_t base_offset;2356};23572358struct delta_base_cache_entry {2359struct hashmap hash;2360struct delta_base_cache_key key;2361struct list_head lru;2362void*data;2363unsigned long size;2364enum object_type type;2365};23662367static unsigned intpack_entry_hash(struct packed_git *p, off_t base_offset)2368{2369unsigned int hash;23702371 hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;2372 hash += (hash >>8) + (hash >>16);2373return hash;2374}23752376static struct delta_base_cache_entry *2377get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)2378{2379struct hashmap_entry entry;2380struct delta_base_cache_key key;23812382if(!delta_base_cache.cmpfn)2383return NULL;23842385hashmap_entry_init(&entry,pack_entry_hash(p, base_offset));2386 key.p = p;2387 key.base_offset = base_offset;2388returnhashmap_get(&delta_base_cache, &entry, &key);2389}23902391static intdelta_base_cache_key_eq(const struct delta_base_cache_key *a,2392const struct delta_base_cache_key *b)2393{2394return a->p == b->p && a->base_offset == b->base_offset;2395}23962397static intdelta_base_cache_hash_cmp(const void*va,const void*vb,2398const void*vkey)2399{2400const struct delta_base_cache_entry *a = va, *b = vb;2401const struct delta_base_cache_key *key = vkey;2402if(key)2403return!delta_base_cache_key_eq(&a->key, key);2404else2405return!delta_base_cache_key_eq(&a->key, &b->key);2406}24072408static intin_delta_base_cache(struct packed_git *p, off_t base_offset)2409{2410return!!get_delta_base_cache_entry(p, base_offset);2411}24122413/*2414 * Remove the entry from the cache, but do _not_ free the associated2415 * entry data. The caller takes ownership of the "data" buffer, and2416 * should copy out any fields it wants before detaching.2417 */2418static voiddetach_delta_base_cache_entry(struct delta_base_cache_entry *ent)2419{2420hashmap_remove(&delta_base_cache, ent, &ent->key);2421list_del(&ent->lru);2422 delta_base_cached -= ent->size;2423free(ent);2424}24252426static void*cache_or_unpack_entry(struct packed_git *p, off_t base_offset,2427unsigned long*base_size,enum object_type *type)2428{2429struct delta_base_cache_entry *ent;24302431 ent =get_delta_base_cache_entry(p, base_offset);2432if(!ent)2433returnunpack_entry(p, base_offset, type, base_size);24342435*type = ent->type;2436*base_size = ent->size;2437returnxmemdupz(ent->data, ent->size);2438}24392440staticinlinevoidrelease_delta_base_cache(struct delta_base_cache_entry *ent)2441{2442free(ent->data);2443detach_delta_base_cache_entry(ent);2444}24452446voidclear_delta_base_cache(void)2447{2448struct list_head *lru, *tmp;2449list_for_each_safe(lru, tmp, &delta_base_cache_lru) {2450struct delta_base_cache_entry *entry =2451list_entry(lru,struct delta_base_cache_entry, lru);2452release_delta_base_cache(entry);2453}2454}24552456static voidadd_delta_base_cache(struct packed_git *p, off_t base_offset,2457void*base,unsigned long base_size,enum object_type type)2458{2459struct delta_base_cache_entry *ent =xmalloc(sizeof(*ent));2460struct list_head *lru, *tmp;24612462 delta_base_cached += base_size;24632464list_for_each_safe(lru, tmp, &delta_base_cache_lru) {2465struct delta_base_cache_entry *f =2466list_entry(lru,struct delta_base_cache_entry, lru);2467if(delta_base_cached <= delta_base_cache_limit)2468break;2469release_delta_base_cache(f);2470}24712472 ent->key.p = p;2473 ent->key.base_offset = base_offset;2474 ent->type = type;2475 ent->data = base;2476 ent->size = base_size;2477list_add_tail(&ent->lru, &delta_base_cache_lru);24782479if(!delta_base_cache.cmpfn)2480hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp,0);2481hashmap_entry_init(ent,pack_entry_hash(p, base_offset));2482hashmap_add(&delta_base_cache, ent);2483}24842485static void*read_object(const unsigned char*sha1,enum object_type *type,2486unsigned long*size);24872488static voidwrite_pack_access_log(struct packed_git *p, off_t obj_offset)2489{2490static struct trace_key pack_access =TRACE_KEY_INIT(PACK_ACCESS);2491trace_printf_key(&pack_access,"%s%"PRIuMAX"\n",2492 p->pack_name, (uintmax_t)obj_offset);2493}24942495int do_check_packed_object_crc;24962497#define UNPACK_ENTRY_STACK_PREALLOC 642498struct unpack_entry_stack_ent {2499 off_t obj_offset;2500 off_t curpos;2501unsigned long size;2502};25032504void*unpack_entry(struct packed_git *p, off_t obj_offset,2505enum object_type *final_type,unsigned long*final_size)2506{2507struct pack_window *w_curs = NULL;2508 off_t curpos = obj_offset;2509void*data = NULL;2510unsigned long size;2511enum object_type type;2512struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];2513struct unpack_entry_stack_ent *delta_stack = small_delta_stack;2514int delta_stack_nr =0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;2515int base_from_cache =0;25162517write_pack_access_log(p, obj_offset);25182519/* PHASE 1: drill down to the innermost base object */2520for(;;) {2521 off_t base_offset;2522int i;2523struct delta_base_cache_entry *ent;25242525 ent =get_delta_base_cache_entry(p, curpos);2526if(ent) {2527 type = ent->type;2528 data = ent->data;2529 size = ent->size;2530detach_delta_base_cache_entry(ent);2531 base_from_cache =1;2532break;2533}25342535if(do_check_packed_object_crc && p->index_version >1) {2536struct revindex_entry *revidx =find_pack_revindex(p, obj_offset);2537 off_t len = revidx[1].offset - obj_offset;2538if(check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {2539const unsigned char*sha1 =2540nth_packed_object_sha1(p, revidx->nr);2541error("bad packed object CRC for%s",2542sha1_to_hex(sha1));2543mark_bad_packed_object(p, sha1);2544unuse_pack(&w_curs);2545return NULL;2546}2547}25482549 type =unpack_object_header(p, &w_curs, &curpos, &size);2550if(type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)2551break;25522553 base_offset =get_delta_base(p, &w_curs, &curpos, type, obj_offset);2554if(!base_offset) {2555error("failed to validate delta base reference "2556"at offset %"PRIuMAX" from%s",2557(uintmax_t)curpos, p->pack_name);2558/* bail to phase 2, in hopes of recovery */2559 data = NULL;2560break;2561}25622563/* push object, proceed to base */2564if(delta_stack_nr >= delta_stack_alloc2565&& delta_stack == small_delta_stack) {2566 delta_stack_alloc =alloc_nr(delta_stack_nr);2567ALLOC_ARRAY(delta_stack, delta_stack_alloc);2568memcpy(delta_stack, small_delta_stack,2569sizeof(*delta_stack)*delta_stack_nr);2570}else{2571ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);2572}2573 i = delta_stack_nr++;2574 delta_stack[i].obj_offset = obj_offset;2575 delta_stack[i].curpos = curpos;2576 delta_stack[i].size = size;25772578 curpos = obj_offset = base_offset;2579}25802581/* PHASE 2: handle the base */2582switch(type) {2583case OBJ_OFS_DELTA:2584case OBJ_REF_DELTA:2585if(data)2586die("BUG: unpack_entry: left loop at a valid delta");2587break;2588case OBJ_COMMIT:2589case OBJ_TREE:2590case OBJ_BLOB:2591case OBJ_TAG:2592if(!base_from_cache)2593 data =unpack_compressed_entry(p, &w_curs, curpos, size);2594break;2595default:2596 data = NULL;2597error("unknown object type%iat offset %"PRIuMAX" in%s",2598 type, (uintmax_t)obj_offset, p->pack_name);2599}26002601/* PHASE 3: apply deltas in order */26022603/* invariants:2604 * 'data' holds the base data, or NULL if there was corruption2605 */2606while(delta_stack_nr) {2607void*delta_data;2608void*base = data;2609unsigned long delta_size, base_size = size;2610int i;26112612 data = NULL;26132614if(base)2615add_delta_base_cache(p, obj_offset, base, base_size, type);26162617if(!base) {2618/*2619 * We're probably in deep shit, but let's try to fetch2620 * the required base anyway from another pack or loose.2621 * This is costly but should happen only in the presence2622 * of a corrupted pack, and is better than failing outright.2623 */2624struct revindex_entry *revidx;2625const unsigned char*base_sha1;2626 revidx =find_pack_revindex(p, obj_offset);2627if(revidx) {2628 base_sha1 =nth_packed_object_sha1(p, revidx->nr);2629error("failed to read delta base object%s"2630" at offset %"PRIuMAX" from%s",2631sha1_to_hex(base_sha1), (uintmax_t)obj_offset,2632 p->pack_name);2633mark_bad_packed_object(p, base_sha1);2634 base =read_object(base_sha1, &type, &base_size);2635}2636}26372638 i = --delta_stack_nr;2639 obj_offset = delta_stack[i].obj_offset;2640 curpos = delta_stack[i].curpos;2641 delta_size = delta_stack[i].size;26422643if(!base)2644continue;26452646 delta_data =unpack_compressed_entry(p, &w_curs, curpos, delta_size);26472648if(!delta_data) {2649error("failed to unpack compressed delta "2650"at offset %"PRIuMAX" from%s",2651(uintmax_t)curpos, p->pack_name);2652 data = NULL;2653continue;2654}26552656 data =patch_delta(base, base_size,2657 delta_data, delta_size,2658&size);26592660/*2661 * We could not apply the delta; warn the user, but keep going.2662 * Our failure will be noticed either in the next iteration of2663 * the loop, or if this is the final delta, in the caller when2664 * we return NULL. Those code paths will take care of making2665 * a more explicit warning and retrying with another copy of2666 * the object.2667 */2668if(!data)2669error("failed to apply delta");26702671free(delta_data);2672}26732674*final_type = type;2675*final_size = size;26762677unuse_pack(&w_curs);26782679if(delta_stack != small_delta_stack)2680free(delta_stack);26812682return data;2683}26842685const unsigned char*nth_packed_object_sha1(struct packed_git *p,2686uint32_t n)2687{2688const unsigned char*index = p->index_data;2689if(!index) {2690if(open_pack_index(p))2691return NULL;2692 index = p->index_data;2693}2694if(n >= p->num_objects)2695return NULL;2696 index +=4*256;2697if(p->index_version ==1) {2698return index +24* n +4;2699}else{2700 index +=8;2701return index +20* n;2702}2703}27042705voidcheck_pack_index_ptr(const struct packed_git *p,const void*vptr)2706{2707const unsigned char*ptr = vptr;2708const unsigned char*start = p->index_data;2709const unsigned char*end = start + p->index_size;2710if(ptr < start)2711die(_("offset before start of pack index for%s(corrupt index?)"),2712 p->pack_name);2713/* No need to check for underflow; .idx files must be at least 8 bytes */2714if(ptr >= end -8)2715die(_("offset beyond end of pack index for%s(truncated index?)"),2716 p->pack_name);2717}27182719off_t nth_packed_object_offset(const struct packed_git *p,uint32_t n)2720{2721const unsigned char*index = p->index_data;2722 index +=4*256;2723if(p->index_version ==1) {2724returnntohl(*((uint32_t*)(index +24* n)));2725}else{2726uint32_t off;2727 index +=8+ p->num_objects * (20+4);2728 off =ntohl(*((uint32_t*)(index +4* n)));2729if(!(off &0x80000000))2730return off;2731 index += p->num_objects *4+ (off &0x7fffffff) *8;2732check_pack_index_ptr(p, index);2733return(((uint64_t)ntohl(*((uint32_t*)(index +0)))) <<32) |2734ntohl(*((uint32_t*)(index +4)));2735}2736}27372738off_t find_pack_entry_one(const unsigned char*sha1,2739struct packed_git *p)2740{2741const uint32_t*level1_ofs = p->index_data;2742const unsigned char*index = p->index_data;2743unsigned hi, lo, stride;2744static int use_lookup = -1;2745static int debug_lookup = -1;27462747if(debug_lookup <0)2748 debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");27492750if(!index) {2751if(open_pack_index(p))2752return0;2753 level1_ofs = p->index_data;2754 index = p->index_data;2755}2756if(p->index_version >1) {2757 level1_ofs +=2;2758 index +=8;2759}2760 index +=4*256;2761 hi =ntohl(level1_ofs[*sha1]);2762 lo = ((*sha1 ==0x0) ?0:ntohl(level1_ofs[*sha1 -1]));2763if(p->index_version >1) {2764 stride =20;2765}else{2766 stride =24;2767 index +=4;2768}27692770if(debug_lookup)2771printf("%02x%02x%02x... lo%uhi%unr %"PRIu32"\n",2772 sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);27732774if(use_lookup <0)2775 use_lookup = !!getenv("GIT_USE_LOOKUP");2776if(use_lookup) {2777int pos =sha1_entry_pos(index, stride,0,2778 lo, hi, p->num_objects, sha1);2779if(pos <0)2780return0;2781returnnth_packed_object_offset(p, pos);2782}27832784do{2785unsigned mi = (lo + hi) /2;2786int cmp =hashcmp(index + mi * stride, sha1);27872788if(debug_lookup)2789printf("lo%uhi%urg%umi%u\n",2790 lo, hi, hi - lo, mi);2791if(!cmp)2792returnnth_packed_object_offset(p, mi);2793if(cmp >0)2794 hi = mi;2795else2796 lo = mi+1;2797}while(lo < hi);2798return0;2799}28002801intis_pack_valid(struct packed_git *p)2802{2803/* An already open pack is known to be valid. */2804if(p->pack_fd != -1)2805return1;28062807/* If the pack has one window completely covering the2808 * file size, the pack is known to be valid even if2809 * the descriptor is not currently open.2810 */2811if(p->windows) {2812struct pack_window *w = p->windows;28132814if(!w->offset && w->len == p->pack_size)2815return1;2816}28172818/* Force the pack to open to prove its valid. */2819return!open_packed_git(p);2820}28212822static intfill_pack_entry(const unsigned char*sha1,2823struct pack_entry *e,2824struct packed_git *p)2825{2826 off_t offset;28272828if(p->num_bad_objects) {2829unsigned i;2830for(i =0; i < p->num_bad_objects; i++)2831if(!hashcmp(sha1, p->bad_object_sha1 +20* i))2832return0;2833}28342835 offset =find_pack_entry_one(sha1, p);2836if(!offset)2837return0;28382839/*2840 * We are about to tell the caller where they can locate the2841 * requested object. We better make sure the packfile is2842 * still here and can be accessed before supplying that2843 * answer, as it may have been deleted since the index was2844 * loaded!2845 */2846if(!is_pack_valid(p))2847return0;2848 e->offset = offset;2849 e->p = p;2850hashcpy(e->sha1, sha1);2851return1;2852}28532854/*2855 * Iff a pack file contains the object named by sha1, return true and2856 * store its location to e.2857 */2858static intfind_pack_entry(const unsigned char*sha1,struct pack_entry *e)2859{2860struct mru_entry *p;28612862prepare_packed_git();2863if(!packed_git)2864return0;28652866for(p = packed_git_mru->head; p; p = p->next) {2867if(fill_pack_entry(sha1, e, p->item)) {2868mru_mark(packed_git_mru, p);2869return1;2870}2871}2872return0;2873}28742875struct packed_git *find_sha1_pack(const unsigned char*sha1,2876struct packed_git *packs)2877{2878struct packed_git *p;28792880for(p = packs; p; p = p->next) {2881if(find_pack_entry_one(sha1, p))2882return p;2883}2884return NULL;28852886}28872888static intsha1_loose_object_info(const unsigned char*sha1,2889struct object_info *oi,2890int flags)2891{2892int status =0;2893unsigned long mapsize;2894void*map;2895 git_zstream stream;2896char hdr[32];2897struct strbuf hdrbuf = STRBUF_INIT;28982899if(oi->delta_base_sha1)2900hashclr(oi->delta_base_sha1);29012902/*2903 * If we don't care about type or size, then we don't2904 * need to look inside the object at all. Note that we2905 * do not optimize out the stat call, even if the2906 * caller doesn't care about the disk-size, since our2907 * return value implicitly indicates whether the2908 * object even exists.2909 */2910if(!oi->typep && !oi->typename && !oi->sizep) {2911const char*path;2912struct stat st;2913if(stat_sha1_file(sha1, &st, &path) <0)2914return-1;2915if(oi->disk_sizep)2916*oi->disk_sizep = st.st_size;2917return0;2918}29192920 map =map_sha1_file(sha1, &mapsize);2921if(!map)2922return-1;2923if(oi->disk_sizep)2924*oi->disk_sizep = mapsize;2925if((flags & LOOKUP_UNKNOWN_OBJECT)) {2926if(unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr,sizeof(hdr), &hdrbuf) <0)2927 status =error("unable to unpack%sheader with --allow-unknown-type",2928sha1_to_hex(sha1));2929}else if(unpack_sha1_header(&stream, map, mapsize, hdr,sizeof(hdr)) <0)2930 status =error("unable to unpack%sheader",2931sha1_to_hex(sha1));2932if(status <0)2933;/* Do nothing */2934else if(hdrbuf.len) {2935if((status =parse_sha1_header_extended(hdrbuf.buf, oi, flags)) <0)2936 status =error("unable to parse%sheader with --allow-unknown-type",2937sha1_to_hex(sha1));2938}else if((status =parse_sha1_header_extended(hdr, oi, flags)) <0)2939 status =error("unable to parse%sheader",sha1_to_hex(sha1));2940git_inflate_end(&stream);2941munmap(map, mapsize);2942if(status && oi->typep)2943*oi->typep = status;2944strbuf_release(&hdrbuf);2945return0;2946}29472948intsha1_object_info_extended(const unsigned char*sha1,struct object_info *oi,unsigned flags)2949{2950struct cached_object *co;2951struct pack_entry e;2952int rtype;2953enum object_type real_type;2954const unsigned char*real =lookup_replace_object_extended(sha1, flags);29552956 co =find_cached_object(real);2957if(co) {2958if(oi->typep)2959*(oi->typep) = co->type;2960if(oi->sizep)2961*(oi->sizep) = co->size;2962if(oi->disk_sizep)2963*(oi->disk_sizep) =0;2964if(oi->delta_base_sha1)2965hashclr(oi->delta_base_sha1);2966if(oi->typename)2967strbuf_addstr(oi->typename,typename(co->type));2968 oi->whence = OI_CACHED;2969return0;2970}29712972if(!find_pack_entry(real, &e)) {2973/* Most likely it's a loose object. */2974if(!sha1_loose_object_info(real, oi, flags)) {2975 oi->whence = OI_LOOSE;2976return0;2977}29782979/* Not a loose object; someone else may have just packed it. */2980reprepare_packed_git();2981if(!find_pack_entry(real, &e))2982return-1;2983}29842985/*2986 * packed_object_info() does not follow the delta chain to2987 * find out the real type, unless it is given oi->typep.2988 */2989if(oi->typename && !oi->typep)2990 oi->typep = &real_type;29912992 rtype =packed_object_info(e.p, e.offset, oi);2993if(rtype <0) {2994mark_bad_packed_object(e.p, real);2995if(oi->typep == &real_type)2996 oi->typep = NULL;2997returnsha1_object_info_extended(real, oi,0);2998}else if(in_delta_base_cache(e.p, e.offset)) {2999 oi->whence = OI_DBCACHED;3000}else{3001 oi->whence = OI_PACKED;3002 oi->u.packed.offset = e.offset;3003 oi->u.packed.pack = e.p;3004 oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||3005 rtype == OBJ_OFS_DELTA);3006}3007if(oi->typename)3008strbuf_addstr(oi->typename,typename(*oi->typep));3009if(oi->typep == &real_type)3010 oi->typep = NULL;30113012return0;3013}30143015/* returns enum object_type or negative */3016intsha1_object_info(const unsigned char*sha1,unsigned long*sizep)3017{3018enum object_type type;3019struct object_info oi = OBJECT_INFO_INIT;30203021 oi.typep = &type;3022 oi.sizep = sizep;3023if(sha1_object_info_extended(sha1, &oi, LOOKUP_REPLACE_OBJECT) <0)3024return-1;3025return type;3026}30273028static void*read_packed_sha1(const unsigned char*sha1,3029enum object_type *type,unsigned long*size)3030{3031struct pack_entry e;3032void*data;30333034if(!find_pack_entry(sha1, &e))3035return NULL;3036 data =cache_or_unpack_entry(e.p, e.offset, size, type);3037if(!data) {3038/*3039 * We're probably in deep shit, but let's try to fetch3040 * the required object anyway from another pack or loose.3041 * This should happen only in the presence of a corrupted3042 * pack, and is better than failing outright.3043 */3044error("failed to read object%sat offset %"PRIuMAX" from%s",3045sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);3046mark_bad_packed_object(e.p, sha1);3047 data =read_object(sha1, type, size);3048}3049return data;3050}30513052intpretend_sha1_file(void*buf,unsigned long len,enum object_type type,3053unsigned char*sha1)3054{3055struct cached_object *co;30563057hash_sha1_file(buf, len,typename(type), sha1);3058if(has_sha1_file(sha1) ||find_cached_object(sha1))3059return0;3060ALLOC_GROW(cached_objects, cached_object_nr +1, cached_object_alloc);3061 co = &cached_objects[cached_object_nr++];3062 co->size = len;3063 co->type = type;3064 co->buf =xmalloc(len);3065memcpy(co->buf, buf, len);3066hashcpy(co->sha1, sha1);3067return0;3068}30693070static void*read_object(const unsigned char*sha1,enum object_type *type,3071unsigned long*size)3072{3073unsigned long mapsize;3074void*map, *buf;3075struct cached_object *co;30763077 co =find_cached_object(sha1);3078if(co) {3079*type = co->type;3080*size = co->size;3081returnxmemdupz(co->buf, co->size);3082}30833084 buf =read_packed_sha1(sha1, type, size);3085if(buf)3086return buf;3087 map =map_sha1_file(sha1, &mapsize);3088if(map) {3089 buf =unpack_sha1_file(map, mapsize, type, size, sha1);3090munmap(map, mapsize);3091return buf;3092}3093reprepare_packed_git();3094returnread_packed_sha1(sha1, type, size);3095}30963097/*3098 * This function dies on corrupt objects; the callers who want to3099 * deal with them should arrange to call read_object() and give error3100 * messages themselves.3101 */3102void*read_sha1_file_extended(const unsigned char*sha1,3103enum object_type *type,3104unsigned long*size,3105unsigned flag)3106{3107void*data;3108const struct packed_git *p;3109const char*path;3110struct stat st;3111const unsigned char*repl =lookup_replace_object_extended(sha1, flag);31123113 errno =0;3114 data =read_object(repl, type, size);3115if(data)3116return data;31173118if(errno && errno != ENOENT)3119die_errno("failed to read object%s",sha1_to_hex(sha1));31203121/* die if we replaced an object with one that does not exist */3122if(repl != sha1)3123die("replacement%snot found for%s",3124sha1_to_hex(repl),sha1_to_hex(sha1));31253126if(!stat_sha1_file(repl, &st, &path))3127die("loose object%s(stored in%s) is corrupt",3128sha1_to_hex(repl), path);31293130if((p =has_packed_and_bad(repl)) != NULL)3131die("packed object%s(stored in%s) is corrupt",3132sha1_to_hex(repl), p->pack_name);31333134return NULL;3135}31363137void*read_object_with_reference(const unsigned char*sha1,3138const char*required_type_name,3139unsigned long*size,3140unsigned char*actual_sha1_return)3141{3142enum object_type type, required_type;3143void*buffer;3144unsigned long isize;3145unsigned char actual_sha1[20];31463147 required_type =type_from_string(required_type_name);3148hashcpy(actual_sha1, sha1);3149while(1) {3150int ref_length = -1;3151const char*ref_type = NULL;31523153 buffer =read_sha1_file(actual_sha1, &type, &isize);3154if(!buffer)3155return NULL;3156if(type == required_type) {3157*size = isize;3158if(actual_sha1_return)3159hashcpy(actual_sha1_return, actual_sha1);3160return buffer;3161}3162/* Handle references */3163else if(type == OBJ_COMMIT)3164 ref_type ="tree ";3165else if(type == OBJ_TAG)3166 ref_type ="object ";3167else{3168free(buffer);3169return NULL;3170}3171 ref_length =strlen(ref_type);31723173if(ref_length +40> isize ||3174memcmp(buffer, ref_type, ref_length) ||3175get_sha1_hex((char*) buffer + ref_length, actual_sha1)) {3176free(buffer);3177return NULL;3178}3179free(buffer);3180/* Now we have the ID of the referred-to object in3181 * actual_sha1. Check again. */3182}3183}31843185static voidwrite_sha1_file_prepare(const void*buf,unsigned long len,3186const char*type,unsigned char*sha1,3187char*hdr,int*hdrlen)3188{3189 git_SHA_CTX c;31903191/* Generate the header */3192*hdrlen =xsnprintf(hdr, *hdrlen,"%s %lu", type, len)+1;31933194/* Sha1.. */3195git_SHA1_Init(&c);3196git_SHA1_Update(&c, hdr, *hdrlen);3197git_SHA1_Update(&c, buf, len);3198git_SHA1_Final(sha1, &c);3199}32003201/*3202 * Move the just written object into its final resting place.3203 */3204intfinalize_object_file(const char*tmpfile,const char*filename)3205{3206int ret =0;32073208if(object_creation_mode == OBJECT_CREATION_USES_RENAMES)3209goto try_rename;3210else if(link(tmpfile, filename))3211 ret = errno;32123213/*3214 * Coda hack - coda doesn't like cross-directory links,3215 * so we fall back to a rename, which will mean that it3216 * won't be able to check collisions, but that's not a3217 * big deal.3218 *3219 * The same holds for FAT formatted media.3220 *3221 * When this succeeds, we just return. We have nothing3222 * left to unlink.3223 */3224if(ret && ret != EEXIST) {3225 try_rename:3226if(!rename(tmpfile, filename))3227goto out;3228 ret = errno;3229}3230unlink_or_warn(tmpfile);3231if(ret) {3232if(ret != EEXIST) {3233returnerror_errno("unable to write sha1 filename%s", filename);3234}3235/* FIXME!!! Collision check here ? */3236}32373238out:3239if(adjust_shared_perm(filename))3240returnerror("unable to set permission to '%s'", filename);3241return0;3242}32433244static intwrite_buffer(int fd,const void*buf,size_t len)3245{3246if(write_in_full(fd, buf, len) <0)3247returnerror_errno("file write error");3248return0;3249}32503251inthash_sha1_file(const void*buf,unsigned long len,const char*type,3252unsigned char*sha1)3253{3254char hdr[32];3255int hdrlen =sizeof(hdr);3256write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);3257return0;3258}32593260/* Finalize a file on disk, and close it. */3261static voidclose_sha1_file(int fd)3262{3263if(fsync_object_files)3264fsync_or_die(fd,"sha1 file");3265if(close(fd) !=0)3266die_errno("error when closing sha1 file");3267}32683269/* Size of directory component, including the ending '/' */3270staticinlineintdirectory_size(const char*filename)3271{3272const char*s =strrchr(filename,'/');3273if(!s)3274return0;3275return s - filename +1;3276}32773278/*3279 * This creates a temporary file in the same directory as the final3280 * 'filename'3281 *3282 * We want to avoid cross-directory filename renames, because those3283 * can have problems on various filesystems (FAT, NFS, Coda).3284 */3285static intcreate_tmpfile(struct strbuf *tmp,const char*filename)3286{3287int fd, dirlen =directory_size(filename);32883289strbuf_reset(tmp);3290strbuf_add(tmp, filename, dirlen);3291strbuf_addstr(tmp,"tmp_obj_XXXXXX");3292 fd =git_mkstemp_mode(tmp->buf,0444);3293if(fd <0&& dirlen && errno == ENOENT) {3294/*3295 * Make sure the directory exists; note that the contents3296 * of the buffer are undefined after mkstemp returns an3297 * error, so we have to rewrite the whole buffer from3298 * scratch.3299 */3300strbuf_reset(tmp);3301strbuf_add(tmp, filename, dirlen -1);3302if(mkdir(tmp->buf,0777) && errno != EEXIST)3303return-1;3304if(adjust_shared_perm(tmp->buf))3305return-1;33063307/* Try again */3308strbuf_addstr(tmp,"/tmp_obj_XXXXXX");3309 fd =git_mkstemp_mode(tmp->buf,0444);3310}3311return fd;3312}33133314static intwrite_loose_object(const unsigned char*sha1,char*hdr,int hdrlen,3315const void*buf,unsigned long len,time_t mtime)3316{3317int fd, ret;3318unsigned char compressed[4096];3319 git_zstream stream;3320 git_SHA_CTX c;3321unsigned char parano_sha1[20];3322static struct strbuf tmp_file = STRBUF_INIT;3323const char*filename =sha1_file_name(sha1);33243325 fd =create_tmpfile(&tmp_file, filename);3326if(fd <0) {3327if(errno == EACCES)3328returnerror("insufficient permission for adding an object to repository database%s",get_object_directory());3329else3330returnerror_errno("unable to create temporary file");3331}33323333/* Set it up */3334git_deflate_init(&stream, zlib_compression_level);3335 stream.next_out = compressed;3336 stream.avail_out =sizeof(compressed);3337git_SHA1_Init(&c);33383339/* First header.. */3340 stream.next_in = (unsigned char*)hdr;3341 stream.avail_in = hdrlen;3342while(git_deflate(&stream,0) == Z_OK)3343;/* nothing */3344git_SHA1_Update(&c, hdr, hdrlen);33453346/* Then the data itself.. */3347 stream.next_in = (void*)buf;3348 stream.avail_in = len;3349do{3350unsigned char*in0 = stream.next_in;3351 ret =git_deflate(&stream, Z_FINISH);3352git_SHA1_Update(&c, in0, stream.next_in - in0);3353if(write_buffer(fd, compressed, stream.next_out - compressed) <0)3354die("unable to write sha1 file");3355 stream.next_out = compressed;3356 stream.avail_out =sizeof(compressed);3357}while(ret == Z_OK);33583359if(ret != Z_STREAM_END)3360die("unable to deflate new object%s(%d)",sha1_to_hex(sha1), ret);3361 ret =git_deflate_end_gently(&stream);3362if(ret != Z_OK)3363die("deflateEnd on object%sfailed (%d)",sha1_to_hex(sha1), ret);3364git_SHA1_Final(parano_sha1, &c);3365if(hashcmp(sha1, parano_sha1) !=0)3366die("confused by unstable object source data for%s",sha1_to_hex(sha1));33673368close_sha1_file(fd);33693370if(mtime) {3371struct utimbuf utb;3372 utb.actime = mtime;3373 utb.modtime = mtime;3374if(utime(tmp_file.buf, &utb) <0)3375warning_errno("failed utime() on%s", tmp_file.buf);3376}33773378returnfinalize_object_file(tmp_file.buf, filename);3379}33803381static intfreshen_loose_object(const unsigned char*sha1)3382{3383returncheck_and_freshen(sha1,1);3384}33853386static intfreshen_packed_object(const unsigned char*sha1)3387{3388struct pack_entry e;3389if(!find_pack_entry(sha1, &e))3390return0;3391if(e.p->freshened)3392return1;3393if(!freshen_file(e.p->pack_name))3394return0;3395 e.p->freshened =1;3396return1;3397}33983399intwrite_sha1_file(const void*buf,unsigned long len,const char*type,unsigned char*sha1)3400{3401char hdr[32];3402int hdrlen =sizeof(hdr);34033404/* Normally if we have it in the pack then we do not bother writing3405 * it out into .git/objects/??/?{38} file.3406 */3407write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);3408if(freshen_packed_object(sha1) ||freshen_loose_object(sha1))3409return0;3410returnwrite_loose_object(sha1, hdr, hdrlen, buf, len,0);3411}34123413inthash_sha1_file_literally(const void*buf,unsigned long len,const char*type,3414unsigned char*sha1,unsigned flags)3415{3416char*header;3417int hdrlen, status =0;34183419/* type string, SP, %lu of the length plus NUL must fit this */3420 hdrlen =strlen(type) +32;3421 header =xmalloc(hdrlen);3422write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen);34233424if(!(flags & HASH_WRITE_OBJECT))3425goto cleanup;3426if(freshen_packed_object(sha1) ||freshen_loose_object(sha1))3427goto cleanup;3428 status =write_loose_object(sha1, header, hdrlen, buf, len,0);34293430cleanup:3431free(header);3432return status;3433}34343435intforce_object_loose(const unsigned char*sha1,time_t mtime)3436{3437void*buf;3438unsigned long len;3439enum object_type type;3440char hdr[32];3441int hdrlen;3442int ret;34433444if(has_loose_object(sha1))3445return0;3446 buf =read_packed_sha1(sha1, &type, &len);3447if(!buf)3448returnerror("cannot read sha1_file for%s",sha1_to_hex(sha1));3449 hdrlen =xsnprintf(hdr,sizeof(hdr),"%s %lu",typename(type), len) +1;3450 ret =write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);3451free(buf);34523453return ret;3454}34553456inthas_pack_index(const unsigned char*sha1)3457{3458struct stat st;3459if(stat(sha1_pack_index_name(sha1), &st))3460return0;3461return1;3462}34633464inthas_sha1_pack(const unsigned char*sha1)3465{3466struct pack_entry e;3467returnfind_pack_entry(sha1, &e);3468}34693470inthas_sha1_file_with_flags(const unsigned char*sha1,int flags)3471{3472struct pack_entry e;34733474if(find_pack_entry(sha1, &e))3475return1;3476if(has_loose_object(sha1))3477return1;3478if(flags & HAS_SHA1_QUICK)3479return0;3480reprepare_packed_git();3481returnfind_pack_entry(sha1, &e);3482}34833484inthas_object_file(const struct object_id *oid)3485{3486returnhas_sha1_file(oid->hash);3487}34883489inthas_object_file_with_flags(const struct object_id *oid,int flags)3490{3491returnhas_sha1_file_with_flags(oid->hash, flags);3492}34933494static voidcheck_tree(const void*buf,size_t size)3495{3496struct tree_desc desc;3497struct name_entry entry;34983499init_tree_desc(&desc, buf, size);3500while(tree_entry(&desc, &entry))3501/* do nothing3502 * tree_entry() will die() on malformed entries */3503;3504}35053506static voidcheck_commit(const void*buf,size_t size)3507{3508struct commit c;3509memset(&c,0,sizeof(c));3510if(parse_commit_buffer(&c, buf, size))3511die("corrupt commit");3512}35133514static voidcheck_tag(const void*buf,size_t size)3515{3516struct tag t;3517memset(&t,0,sizeof(t));3518if(parse_tag_buffer(&t, buf, size))3519die("corrupt tag");3520}35213522static intindex_mem(unsigned char*sha1,void*buf,size_t size,3523enum object_type type,3524const char*path,unsigned flags)3525{3526int ret, re_allocated =0;3527int write_object = flags & HASH_WRITE_OBJECT;35283529if(!type)3530 type = OBJ_BLOB;35313532/*3533 * Convert blobs to git internal format3534 */3535if((type == OBJ_BLOB) && path) {3536struct strbuf nbuf = STRBUF_INIT;3537if(convert_to_git(path, buf, size, &nbuf,3538 write_object ? safe_crlf : SAFE_CRLF_FALSE)) {3539 buf =strbuf_detach(&nbuf, &size);3540 re_allocated =1;3541}3542}3543if(flags & HASH_FORMAT_CHECK) {3544if(type == OBJ_TREE)3545check_tree(buf, size);3546if(type == OBJ_COMMIT)3547check_commit(buf, size);3548if(type == OBJ_TAG)3549check_tag(buf, size);3550}35513552if(write_object)3553 ret =write_sha1_file(buf, size,typename(type), sha1);3554else3555 ret =hash_sha1_file(buf, size,typename(type), sha1);3556if(re_allocated)3557free(buf);3558return ret;3559}35603561static intindex_stream_convert_blob(unsigned char*sha1,int fd,3562const char*path,unsigned flags)3563{3564int ret;3565const int write_object = flags & HASH_WRITE_OBJECT;3566struct strbuf sbuf = STRBUF_INIT;35673568assert(path);3569assert(would_convert_to_git_filter_fd(path));35703571convert_to_git_filter_fd(path, fd, &sbuf,3572 write_object ? safe_crlf : SAFE_CRLF_FALSE);35733574if(write_object)3575 ret =write_sha1_file(sbuf.buf, sbuf.len,typename(OBJ_BLOB),3576 sha1);3577else3578 ret =hash_sha1_file(sbuf.buf, sbuf.len,typename(OBJ_BLOB),3579 sha1);3580strbuf_release(&sbuf);3581return ret;3582}35833584static intindex_pipe(unsigned char*sha1,int fd,enum object_type type,3585const char*path,unsigned flags)3586{3587struct strbuf sbuf = STRBUF_INIT;3588int ret;35893590if(strbuf_read(&sbuf, fd,4096) >=0)3591 ret =index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags);3592else3593 ret = -1;3594strbuf_release(&sbuf);3595return ret;3596}35973598#define SMALL_FILE_SIZE (32*1024)35993600static intindex_core(unsigned char*sha1,int fd,size_t size,3601enum object_type type,const char*path,3602unsigned flags)3603{3604int ret;36053606if(!size) {3607 ret =index_mem(sha1,"", size, type, path, flags);3608}else if(size <= SMALL_FILE_SIZE) {3609char*buf =xmalloc(size);3610if(size ==read_in_full(fd, buf, size))3611 ret =index_mem(sha1, buf, size, type, path, flags);3612else3613 ret =error_errno("short read");3614free(buf);3615}else{3616void*buf =xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd,0);3617 ret =index_mem(sha1, buf, size, type, path, flags);3618munmap(buf, size);3619}3620return ret;3621}36223623/*3624 * This creates one packfile per large blob unless bulk-checkin3625 * machinery is "plugged".3626 *3627 * This also bypasses the usual "convert-to-git" dance, and that is on3628 * purpose. We could write a streaming version of the converting3629 * functions and insert that before feeding the data to fast-import3630 * (or equivalent in-core API described above). However, that is3631 * somewhat complicated, as we do not know the size of the filter3632 * result, which we need to know beforehand when writing a git object.3633 * Since the primary motivation for trying to stream from the working3634 * tree file and to avoid mmaping it in core is to deal with large3635 * binary blobs, they generally do not want to get any conversion, and3636 * callers should avoid this code path when filters are requested.3637 */3638static intindex_stream(unsigned char*sha1,int fd,size_t size,3639enum object_type type,const char*path,3640unsigned flags)3641{3642returnindex_bulk_checkin(sha1, fd, size, type, path, flags);3643}36443645intindex_fd(unsigned char*sha1,int fd,struct stat *st,3646enum object_type type,const char*path,unsigned flags)3647{3648int ret;36493650/*3651 * Call xsize_t() only when needed to avoid potentially unnecessary3652 * die() for large files.3653 */3654if(type == OBJ_BLOB && path &&would_convert_to_git_filter_fd(path))3655 ret =index_stream_convert_blob(sha1, fd, path, flags);3656else if(!S_ISREG(st->st_mode))3657 ret =index_pipe(sha1, fd, type, path, flags);3658else if(st->st_size <= big_file_threshold || type != OBJ_BLOB ||3659(path &&would_convert_to_git(path)))3660 ret =index_core(sha1, fd,xsize_t(st->st_size), type, path,3661 flags);3662else3663 ret =index_stream(sha1, fd,xsize_t(st->st_size), type, path,3664 flags);3665close(fd);3666return ret;3667}36683669intindex_path(unsigned char*sha1,const char*path,struct stat *st,unsigned flags)3670{3671int fd;3672struct strbuf sb = STRBUF_INIT;36733674switch(st->st_mode & S_IFMT) {3675case S_IFREG:3676 fd =open(path, O_RDONLY);3677if(fd <0)3678returnerror_errno("open(\"%s\")", path);3679if(index_fd(sha1, fd, st, OBJ_BLOB, path, flags) <0)3680returnerror("%s: failed to insert into database",3681 path);3682break;3683case S_IFLNK:3684if(strbuf_readlink(&sb, path, st->st_size))3685returnerror_errno("readlink(\"%s\")", path);3686if(!(flags & HASH_WRITE_OBJECT))3687hash_sha1_file(sb.buf, sb.len, blob_type, sha1);3688else if(write_sha1_file(sb.buf, sb.len, blob_type, sha1))3689returnerror("%s: failed to insert into database",3690 path);3691strbuf_release(&sb);3692break;3693case S_IFDIR:3694returnresolve_gitlink_ref(path,"HEAD", sha1);3695default:3696returnerror("%s: unsupported file type", path);3697}3698return0;3699}37003701intread_pack_header(int fd,struct pack_header *header)3702{3703if(read_in_full(fd, header,sizeof(*header)) <sizeof(*header))3704/* "eof before pack header was fully read" */3705return PH_ERROR_EOF;37063707if(header->hdr_signature !=htonl(PACK_SIGNATURE))3708/* "protocol error (pack signature mismatch detected)" */3709return PH_ERROR_PACK_SIGNATURE;3710if(!pack_version_ok(header->hdr_version))3711/* "protocol error (pack version unsupported)" */3712return PH_ERROR_PROTOCOL;3713return0;3714}37153716voidassert_sha1_type(const unsigned char*sha1,enum object_type expect)3717{3718enum object_type type =sha1_object_info(sha1, NULL);3719if(type <0)3720die("%sis not a valid object",sha1_to_hex(sha1));3721if(type != expect)3722die("%sis not a valid '%s' object",sha1_to_hex(sha1),3723typename(expect));3724}37253726static intfor_each_file_in_obj_subdir(int subdir_nr,3727struct strbuf *path,3728 each_loose_object_fn obj_cb,3729 each_loose_cruft_fn cruft_cb,3730 each_loose_subdir_fn subdir_cb,3731void*data)3732{3733size_t baselen = path->len;3734DIR*dir =opendir(path->buf);3735struct dirent *de;3736int r =0;37373738if(!dir) {3739if(errno == ENOENT)3740return0;3741returnerror_errno("unable to open%s", path->buf);3742}37433744while((de =readdir(dir))) {3745if(is_dot_or_dotdot(de->d_name))3746continue;37473748strbuf_setlen(path, baselen);3749strbuf_addf(path,"/%s", de->d_name);37503751if(strlen(de->d_name) ==38) {3752char hex[41];3753unsigned char sha1[20];37543755snprintf(hex,sizeof(hex),"%02x%s",3756 subdir_nr, de->d_name);3757if(!get_sha1_hex(hex, sha1)) {3758if(obj_cb) {3759 r =obj_cb(sha1, path->buf, data);3760if(r)3761break;3762}3763continue;3764}3765}37663767if(cruft_cb) {3768 r =cruft_cb(de->d_name, path->buf, data);3769if(r)3770break;3771}3772}3773closedir(dir);37743775strbuf_setlen(path, baselen);3776if(!r && subdir_cb)3777 r =subdir_cb(subdir_nr, path->buf, data);37783779return r;3780}37813782intfor_each_loose_file_in_objdir_buf(struct strbuf *path,3783 each_loose_object_fn obj_cb,3784 each_loose_cruft_fn cruft_cb,3785 each_loose_subdir_fn subdir_cb,3786void*data)3787{3788size_t baselen = path->len;3789int r =0;3790int i;37913792for(i =0; i <256; i++) {3793strbuf_addf(path,"/%02x", i);3794 r =for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,3795 subdir_cb, data);3796strbuf_setlen(path, baselen);3797if(r)3798break;3799}38003801return r;3802}38033804intfor_each_loose_file_in_objdir(const char*path,3805 each_loose_object_fn obj_cb,3806 each_loose_cruft_fn cruft_cb,3807 each_loose_subdir_fn subdir_cb,3808void*data)3809{3810struct strbuf buf = STRBUF_INIT;3811int r;38123813strbuf_addstr(&buf, path);3814 r =for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb,3815 subdir_cb, data);3816strbuf_release(&buf);38173818return r;3819}38203821struct loose_alt_odb_data {3822 each_loose_object_fn *cb;3823void*data;3824};38253826static intloose_from_alt_odb(struct alternate_object_database *alt,3827void*vdata)3828{3829struct loose_alt_odb_data *data = vdata;3830struct strbuf buf = STRBUF_INIT;3831int r;38323833strbuf_addstr(&buf, alt->path);3834 r =for_each_loose_file_in_objdir_buf(&buf,3835 data->cb, NULL, NULL,3836 data->data);3837strbuf_release(&buf);3838return r;3839}38403841intfor_each_loose_object(each_loose_object_fn cb,void*data,unsigned flags)3842{3843struct loose_alt_odb_data alt;3844int r;38453846 r =for_each_loose_file_in_objdir(get_object_directory(),3847 cb, NULL, NULL, data);3848if(r)3849return r;38503851if(flags & FOR_EACH_OBJECT_LOCAL_ONLY)3852return0;38533854 alt.cb = cb;3855 alt.data = data;3856returnforeach_alt_odb(loose_from_alt_odb, &alt);3857}38583859static intfor_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb,void*data)3860{3861uint32_t i;3862int r =0;38633864for(i =0; i < p->num_objects; i++) {3865const unsigned char*sha1 =nth_packed_object_sha1(p, i);38663867if(!sha1)3868returnerror("unable to get sha1 of object%uin%s",3869 i, p->pack_name);38703871 r =cb(sha1, p, i, data);3872if(r)3873break;3874}3875return r;3876}38773878intfor_each_packed_object(each_packed_object_fn cb,void*data,unsigned flags)3879{3880struct packed_git *p;3881int r =0;3882int pack_errors =0;38833884prepare_packed_git();3885for(p = packed_git; p; p = p->next) {3886if((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)3887continue;3888if(open_pack_index(p)) {3889 pack_errors =1;3890continue;3891}3892 r =for_each_object_in_pack(p, cb, data);3893if(r)3894break;3895}3896return r ? r : pack_errors;3897}38983899static intcheck_stream_sha1(git_zstream *stream,3900const char*hdr,3901unsigned long size,3902const char*path,3903const unsigned char*expected_sha1)3904{3905 git_SHA_CTX c;3906unsigned char real_sha1[GIT_SHA1_RAWSZ];3907unsigned char buf[4096];3908unsigned long total_read;3909int status = Z_OK;39103911git_SHA1_Init(&c);3912git_SHA1_Update(&c, hdr, stream->total_out);39133914/*3915 * We already read some bytes into hdr, but the ones up to the NUL3916 * do not count against the object's content size.3917 */3918 total_read = stream->total_out -strlen(hdr) -1;39193920/*3921 * This size comparison must be "<=" to read the final zlib packets;3922 * see the comment in unpack_sha1_rest for details.3923 */3924while(total_read <= size &&3925(status == Z_OK || status == Z_BUF_ERROR)) {3926 stream->next_out = buf;3927 stream->avail_out =sizeof(buf);3928if(size - total_read < stream->avail_out)3929 stream->avail_out = size - total_read;3930 status =git_inflate(stream, Z_FINISH);3931git_SHA1_Update(&c, buf, stream->next_out - buf);3932 total_read += stream->next_out - buf;3933}3934git_inflate_end(stream);39353936if(status != Z_STREAM_END) {3937error("corrupt loose object '%s'",sha1_to_hex(expected_sha1));3938return-1;3939}3940if(stream->avail_in) {3941error("garbage at end of loose object '%s'",3942sha1_to_hex(expected_sha1));3943return-1;3944}39453946git_SHA1_Final(real_sha1, &c);3947if(hashcmp(expected_sha1, real_sha1)) {3948error("sha1 mismatch for%s(expected%s)", path,3949sha1_to_hex(expected_sha1));3950return-1;3951}39523953return0;3954}39553956intread_loose_object(const char*path,3957const unsigned char*expected_sha1,3958enum object_type *type,3959unsigned long*size,3960void**contents)3961{3962int ret = -1;3963int fd = -1;3964void*map = NULL;3965unsigned long mapsize;3966 git_zstream stream;3967char hdr[32];39683969*contents = NULL;39703971 map =map_sha1_file_1(path, NULL, &mapsize);3972if(!map) {3973error_errno("unable to mmap%s", path);3974goto out;3975}39763977if(unpack_sha1_header(&stream, map, mapsize, hdr,sizeof(hdr)) <0) {3978error("unable to unpack header of%s", path);3979goto out;3980}39813982*type =parse_sha1_header(hdr, size);3983if(*type <0) {3984error("unable to parse header of%s", path);3985git_inflate_end(&stream);3986goto out;3987}39883989if(*type == OBJ_BLOB) {3990if(check_stream_sha1(&stream, hdr, *size, path, expected_sha1) <0)3991goto out;3992}else{3993*contents =unpack_sha1_rest(&stream, hdr, *size, expected_sha1);3994if(!*contents) {3995error("unable to unpack contents of%s", path);3996git_inflate_end(&stream);3997goto out;3998}3999if(check_sha1_signature(expected_sha1, *contents,4000*size,typename(*type))) {4001error("sha1 mismatch for%s(expected%s)", path,4002sha1_to_hex(expected_sha1));4003free(*contents);4004goto out;4005}4006}40074008 ret =0;/* everything checks out */40094010out:4011if(map)4012munmap(map, mapsize);4013if(fd >=0)4014close(fd);4015return ret;4016}