From: Junio C Hamano Date: Sun, 27 Aug 2017 05:55:09 +0000 (-0700) Subject: Merge branch 'jt/packmigrate' X-Git-Tag: v2.15.0-rc0~115 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/eabdcd4ab4b04af8f39e5ea3847b6d364acb874e?ds=inline;hp=-c Merge branch 'jt/packmigrate' Code movement to make it easier to hack later. * jt/packmigrate: (23 commits) pack: move for_each_packed_object() pack: move has_pack_index() pack: move has_sha1_pack() pack: move find_pack_entry() and make it global pack: move find_sha1_pack() pack: move find_pack_entry_one(), is_pack_valid() pack: move check_pack_index_ptr(), nth_packed_object_offset() pack: move nth_packed_object_{sha1,oid} pack: move clear_delta_base_cache(), packed_object_info(), unpack_entry() pack: move unpack_object_header() pack: move get_size_from_delta() pack: move unpack_object_header_buffer() pack: move {,re}prepare_packed_git and approximate_object_count pack: move install_packed_git() pack: move add_packed_git() pack: move unuse_pack() pack: move use_pack() pack: move pack-closing functions pack: move release_pack_memory() pack: move open_pack_index(), parse_pack_index() ... --- eabdcd4ab4b04af8f39e5ea3847b6d364acb874e diff --combined Makefile index ffab6f4568,79550f6dde..f2bb7f2f63 --- a/Makefile +++ b/Makefile @@@ -655,7 -655,6 +655,7 @@@ TEST_PROGRAMS_NEED_X += test-parse-opti TEST_PROGRAMS_NEED_X += test-path-utils TEST_PROGRAMS_NEED_X += test-prio-queue TEST_PROGRAMS_NEED_X += test-read-cache +TEST_PROGRAMS_NEED_X += test-write-cache TEST_PROGRAMS_NEED_X += test-ref-store TEST_PROGRAMS_NEED_X += test-regex TEST_PROGRAMS_NEED_X += test-revision-walking @@@ -817,6 -816,7 +817,7 @@@ LIB_OBJS += notes-merge. LIB_OBJS += notes-utils.o LIB_OBJS += object.o LIB_OBJS += oidset.o + LIB_OBJS += packfile.o LIB_OBJS += pack-bitmap.o LIB_OBJS += pack-bitmap-write.o LIB_OBJS += pack-check.o @@@ -2039,6 -2039,7 +2040,6 @@@ XDIFF_OBJS += xdiff/xhistogram. VCSSVN_OBJS += vcs-svn/line_buffer.o VCSSVN_OBJS += vcs-svn/sliding_window.o -VCSSVN_OBJS += vcs-svn/repo_tree.o VCSSVN_OBJS += vcs-svn/fast_export.o VCSSVN_OBJS += vcs-svn/svndiff.o VCSSVN_OBJS += vcs-svn/svndump.o diff --combined builtin/fetch.c index 132e3224ed,08e094bf12..225c734924 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@@ -17,6 -17,7 +17,7 @@@ #include "connected.h" #include "argv-array.h" #include "utf8.h" + #include "packfile.h" static const char * const builtin_fetch_usage[] = { N_("git fetch [] [ [...]]"), @@@ -1360,6 -1361,11 +1361,6 @@@ int cmd_fetch(int argc, const char **ar if (depth || deepen_since || deepen_not.nr) deepen = 1; - if (recurse_submodules != RECURSE_SUBMODULES_OFF) { - gitmodules_config(); - git_config(submodule_config, NULL); - } - if (all) { if (argc == 1) die(_("fetch --all does not take a repository argument")); diff --combined builtin/fsck.c index 0ab13848a4,338f3ce20b..1e4c471b41 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@@ -15,6 -15,7 +15,7 @@@ #include "progress.h" #include "streaming.h" #include "decorate.h" + #include "packfile.h" #define REACHABLE 0x0001 #define SEEN 0x0002 @@@ -179,7 -180,7 +180,7 @@@ static int traverse_reachable(void unsigned int nr = 0; int result = 0; if (show_progress) - progress = start_progress_delay(_("Checking connectivity"), 0, 0, 2); + progress = start_delayed_progress(_("Checking connectivity"), 0); while (pending.nr) { struct object_array_entry *entry; struct object *obj; diff --combined builtin/merge.c index cc57052993,dfd6830602..7b7320dede --- a/builtin/merge.c +++ b/builtin/merge.c @@@ -32,6 -32,7 +32,7 @@@ #include "gpg-interface.h" #include "sequencer.h" #include "string-list.h" + #include "packfile.h" #define DEFAULT_TWOHEAD (1<<0) #define DEFAULT_OCTOPUS (1<<1) @@@ -70,7 -71,6 +71,7 @@@ static int continue_current_merge static int allow_unrelated_histories; static int show_progress = -1; static int default_to_upstream = 1; +static int signoff; static const char *sign_commit; static struct strategy all_strategy[] = { @@@ -234,7 -234,6 +235,7 @@@ static struct option builtin_merge_opti { OPTION_STRING, 'S', "gpg-sign", &sign_commit, N_("key-id"), N_("GPG sign commit"), PARSE_OPT_OPTARG, NULL, (intptr_t) "" }, OPT_BOOL(0, "overwrite-ignore", &overwrite_ignore, N_("update ignored files (default)")), + OPT_BOOL(0, "signoff", &signoff, N_("add Signed-off-by:")), OPT_END() }; @@@ -765,8 -764,6 +766,8 @@@ static void prepare_to_commit(struct co strbuf_addch(&msg, '\n'); if (0 < option_edit) strbuf_commented_addf(&msg, _(merge_editor_comment), comment_line_char); + if (signoff) + append_signoff(&msg, ignore_non_trailer(msg.buf, msg.len), 0); write_file_buf(git_path_merge_msg(), msg.buf, msg.len); if (run_commit_hook(0 < option_edit, get_index_file(), "prepare-commit-msg", git_path_merge_msg(), "merge", NULL)) diff --combined builtin/prune-packed.c index 8f41f7c20e,97bfde24ba..419238171d --- a/builtin/prune-packed.c +++ b/builtin/prune-packed.c @@@ -2,6 -2,7 +2,7 @@@ #include "cache.h" #include "progress.h" #include "parse-options.h" + #include "packfile.h" static const char * const prune_packed_usage[] = { N_("git prune-packed [-n | --dry-run] [-q | --quiet]"), @@@ -37,7 -38,8 +38,7 @@@ static int prune_object(const struct ob void prune_packed_objects(int opts) { if (opts & PRUNE_PACKED_VERBOSE) - progress = start_progress_delay(_("Removing duplicate objects"), - 256, 95, 2); + progress = start_delayed_progress(_("Removing duplicate objects"), 256); for_each_loose_file_in_objdir(get_object_directory(), prune_object, NULL, prune_subdir, &opts); diff --combined cache.h index bd8802af0e,2f09f8814a..a916bc79e3 --- a/cache.h +++ b/cache.h @@@ -684,8 -684,8 +684,8 @@@ extern int ie_modified(const struct ind #define HASH_WRITE_OBJECT 1 #define HASH_FORMAT_CHECK 2 -extern int index_fd(unsigned char *sha1, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); -extern int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags); +extern int index_fd(struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags); +extern int index_path(struct object_id *oid, const char *path, struct stat *st, unsigned flags); /* * Record to sd the data from st that we use to check whether a file @@@ -902,20 -902,6 +902,6 @@@ extern void check_repository_format(voi */ extern const char *sha1_file_name(const unsigned char *sha1); - /* - * Return the name of the (local) packfile with the specified sha1 in - * its name. The return value is a pointer to memory that is - * overwritten each time this function is called. - */ - extern char *sha1_pack_name(const unsigned char *sha1); - - /* - * Return the name of the (local) pack index file with the specified - * sha1 in its name. The return value is a pointer to memory that is - * overwritten each time this function is called. - */ - extern char *sha1_pack_index_name(const unsigned char *sha1); - /* * Return an abbreviated sha1 unique within this repository's object database. * The result will be at least `len` characters long, and will be NUL @@@ -1192,7 -1178,7 +1178,7 @@@ static inline const unsigned char *look extern int sha1_object_info(const unsigned char *, unsigned long *); extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1); extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1); -extern int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type, unsigned char *sha1, unsigned flags); +extern int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type, struct object_id *oid, unsigned flags); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); extern int force_object_loose(const unsigned char *sha1, time_t mtime); extern int git_open_cloexec(const char *name, int flags); @@@ -1201,15 -1187,10 +1187,10 @@@ extern void *map_sha1_file(const unsign extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz); extern int parse_sha1_header(const char *hdr, unsigned long *sizep); - /* global flag to enable extra checks when accessing packed objects */ - extern int do_check_packed_object_crc; - extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type); extern int finalize_object_file(const char *tmpfile, const char *filename); - extern int has_sha1_pack(const unsigned char *sha1); - /* * Open the loose object at path, check its sha1, and return the contents, * type, and size. If the object is a blob, then "contents" may return NULL, @@@ -1245,8 -1226,6 +1226,6 @@@ extern int has_object_file_with_flags(c */ extern int has_loose_object_nonlocal(const unsigned char *sha1); - extern int has_pack_index(const unsigned char *sha1); - extern void assert_sha1_type(const unsigned char *sha1, enum object_type expect); /* Helper to check and "touch" a file */ @@@ -1619,29 -1598,6 +1598,6 @@@ struct pack_entry struct packed_git *p; }; - extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path); - - /* A hook to report invalid files in pack directory */ - #define PACKDIR_FILE_PACK 1 - #define PACKDIR_FILE_IDX 2 - #define PACKDIR_FILE_GARBAGE 4 - extern void (*report_garbage)(unsigned seen_bits, const char *path); - - extern void prepare_packed_git(void); - extern void reprepare_packed_git(void); - extern void install_packed_git(struct packed_git *pack); - - /* - * Give a rough count of objects in the repository. This sacrifices accuracy - * for speed. - */ - unsigned long approximate_object_count(void); - - extern struct packed_git *find_sha1_pack(const unsigned char *sha1, - struct packed_git *packs); - - extern void pack_report(void); - /* * Create a temporary file rooted in the object database directory, or * die on failure. The filename is taken from "pattern", which should have the @@@ -1650,15 -1606,6 +1606,6 @@@ */ extern int odb_mkstemp(struct strbuf *template, const char *pattern); - /* - * Generate the filename to be used for a pack file with checksum "sha1" and - * extension "ext". The result is written into the strbuf "buf", overwriting - * any existing contents. A pointer to buf->buf is returned as a convenience. - * - * Example: odb_pack_name(out, sha1, "idx") => ".git/objects/pack/pack-1234..idx" - */ - extern char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, const char *ext); - /* * Create a pack .keep file named "name" (which should generally be the output * of odb_pack_name). Returns a file descriptor opened for writing, or -1 on @@@ -1666,67 -1613,6 +1613,6 @@@ */ extern int odb_pack_keep(const char *name); - /* - * mmap the index file for the specified packfile (if it is not - * already mmapped). Return 0 on success. - */ - extern int open_pack_index(struct packed_git *); - - /* - * munmap the index file for the specified packfile (if it is - * currently mmapped). - */ - extern void close_pack_index(struct packed_git *); - - extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); - extern void close_pack_windows(struct packed_git *); - extern void close_all_packs(void); - extern void unuse_pack(struct pack_window **); - extern void clear_delta_base_cache(void); - extern struct packed_git *add_packed_git(const char *path, size_t path_len, int local); - - /* - * Make sure that a pointer access into an mmap'd index file is within bounds, - * and can provide at least 8 bytes of data. - * - * Note that this is only necessary for variable-length segments of the file - * (like the 64-bit extended offset table), as we compare the size to the - * fixed-length parts when we open the file. - */ - extern void check_pack_index_ptr(const struct packed_git *p, const void *ptr); - - /* - * Return the SHA-1 of the nth object within the specified packfile. - * Open the index if it is not already open. The return value points - * at the SHA-1 within the mmapped index. Return NULL if there is an - * error. - */ - extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t n); - /* - * Like nth_packed_object_sha1, but write the data into the object specified by - * the the first argument. Returns the first argument on success, and NULL on - * error. - */ - extern const struct object_id *nth_packed_object_oid(struct object_id *, struct packed_git *, uint32_t n); - - /* - * Return the offset of the nth object within the specified packfile. - * The index must already be opened. - */ - extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t n); - - /* - * If the object named sha1 is present in the specified packfile, - * return its offset within the packfile; otherwise, return 0. - */ - extern off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *); - - extern int is_pack_valid(struct packed_git *); - extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *); - extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); - extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); - extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *); - /* * Iterate over the files in the loose-object parts of the object * directory "path", triggering the following callbacks: @@@ -1776,17 -1662,12 +1662,12 @@@ int for_each_loose_file_in_objdir_buf(s void *data); /* - * Iterate over loose and packed objects in both the local + * Iterate over loose objects in both the local * repository and any alternates repositories (unless the * LOCAL_ONLY flag is set). */ #define FOR_EACH_OBJECT_LOCAL_ONLY 0x1 - typedef int each_packed_object_fn(const struct object_id *oid, - struct packed_git *pack, - uint32_t pos, - void *data); extern int for_each_loose_object(each_loose_object_fn, void *, unsigned flags); - extern int for_each_packed_object(each_packed_object_fn, void *, unsigned flags); struct object_info { /* Request */ @@@ -1836,7 -1717,6 +1717,6 @@@ /* Do not retry packed storage after checking packed and loose storage */ #define OBJECT_INFO_QUICK 8 extern int sha1_object_info_extended(const unsigned char *, struct object_info *, unsigned flags); - extern int packed_object_info(struct packed_git *pack, off_t offset, struct object_info *); /* Dumb servers support */ extern int update_server_info(int); @@@ -1955,8 -1835,6 +1835,8 @@@ void shift_tree_by(const struct object_ #define WS_TRAILING_SPACE (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF) #define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8) #define WS_TAB_WIDTH_MASK 077 +/* All WS_* -- when extended, adapt diff.c emit_symbol */ +#define WS_RULE_MASK 07777 extern unsigned whitespace_rule_cfg; extern unsigned whitespace_rule(const char *); extern unsigned parse_whitespace_rule(const char *); diff --combined diff.c index a74cc08488,4c60990f70..3d3e553a98 --- a/diff.c +++ b/diff.c @@@ -16,11 -16,11 +16,12 @@@ #include "userdiff.h" #include "submodule-config.h" #include "submodule.h" +#include "hashmap.h" #include "ll-merge.h" #include "string-list.h" #include "argv-array.h" #include "graph.h" + #include "packfile.h" #ifdef NO_FAST_WORKING_DIRECTORY #define FAST_WORKING_DIRECTORY 0 @@@ -33,7 -33,6 +34,7 @@@ static int diff_indent_heuristic = 1 static int diff_rename_limit_default = 400; static int diff_suppress_blank_empty; static int diff_use_color_default = -1; +static int diff_color_moved_default; static int diff_context_default = 3; static int diff_interhunk_context_default; static const char *diff_word_regex_cfg; @@@ -58,14 -57,6 +59,14 @@@ static char diff_colors[][COLOR_MAXLEN GIT_COLOR_YELLOW, /* COMMIT */ GIT_COLOR_BG_RED, /* WHITESPACE */ GIT_COLOR_NORMAL, /* FUNCINFO */ + GIT_COLOR_BOLD_MAGENTA, /* OLD_MOVED */ + GIT_COLOR_BOLD_BLUE, /* OLD_MOVED ALTERNATIVE */ + GIT_COLOR_FAINT, /* OLD_MOVED_DIM */ + GIT_COLOR_FAINT_ITALIC, /* OLD_MOVED_ALTERNATIVE_DIM */ + GIT_COLOR_BOLD_CYAN, /* NEW_MOVED */ + GIT_COLOR_BOLD_YELLOW, /* NEW_MOVED ALTERNATIVE */ + GIT_COLOR_FAINT, /* NEW_MOVED_DIM */ + GIT_COLOR_FAINT_ITALIC, /* NEW_MOVED_ALTERNATIVE_DIM */ }; static NORETURN void die_want_option(const char *option_name) @@@ -91,22 -82,6 +92,22 @@@ static int parse_diff_color_slot(const return DIFF_WHITESPACE; if (!strcasecmp(var, "func")) return DIFF_FUNCINFO; + if (!strcasecmp(var, "oldmoved")) + return DIFF_FILE_OLD_MOVED; + if (!strcasecmp(var, "oldmovedalternative")) + return DIFF_FILE_OLD_MOVED_ALT; + if (!strcasecmp(var, "oldmoveddimmed")) + return DIFF_FILE_OLD_MOVED_DIM; + if (!strcasecmp(var, "oldmovedalternativedimmed")) + return DIFF_FILE_OLD_MOVED_ALT_DIM; + if (!strcasecmp(var, "newmoved")) + return DIFF_FILE_NEW_MOVED; + if (!strcasecmp(var, "newmovedalternative")) + return DIFF_FILE_NEW_MOVED_ALT; + if (!strcasecmp(var, "newmoveddimmed")) + return DIFF_FILE_NEW_MOVED_DIM; + if (!strcasecmp(var, "newmovedalternativedimmed")) + return DIFF_FILE_NEW_MOVED_ALT_DIM; return -1; } @@@ -255,44 -230,12 +256,44 @@@ int git_diff_heuristic_config(const cha return 0; } +static int parse_color_moved(const char *arg) +{ + switch (git_parse_maybe_bool(arg)) { + case 0: + return COLOR_MOVED_NO; + case 1: + return COLOR_MOVED_DEFAULT; + default: + break; + } + + if (!strcmp(arg, "no")) + return COLOR_MOVED_NO; + else if (!strcmp(arg, "plain")) + return COLOR_MOVED_PLAIN; + else if (!strcmp(arg, "zebra")) + return COLOR_MOVED_ZEBRA; + else if (!strcmp(arg, "default")) + return COLOR_MOVED_DEFAULT; + else if (!strcmp(arg, "dimmed_zebra")) + return COLOR_MOVED_ZEBRA_DIM; + else + return error(_("color moved setting must be one of 'no', 'default', 'zebra', 'dimmed_zebra', 'plain'")); +} + int git_diff_ui_config(const char *var, const char *value, void *cb) { if (!strcmp(var, "diff.color") || !strcmp(var, "color.diff")) { diff_use_color_default = git_config_colorbool(var, value); return 0; } + if (!strcmp(var, "diff.colormoved")) { + int cm = parse_color_moved(value); + if (cm < 0) + return -1; + diff_color_moved_default = cm; + return 0; + } if (!strcmp(var, "diff.context")) { diff_context_default = git_config_int(var, value); if (diff_context_default < 0) @@@ -401,6 -344,9 +402,6 @@@ int git_diff_basic_config(const char *v return 0; } - if (starts_with(var, "submodule.")) - return parse_submodule_config_option(var, value); - if (git_diff_heuristic_config(var, value, cb) < 0) return -1; @@@ -609,735 -555,68 +610,735 @@@ static void emit_line(struct diff_optio emit_line_0(o, set, reset, line[0], line+1, len-1); } -static int new_blank_line_at_eof(struct emit_callback *ecbdata, const char *line, int len) +enum diff_symbol { + DIFF_SYMBOL_BINARY_DIFF_HEADER, + DIFF_SYMBOL_BINARY_DIFF_HEADER_DELTA, + DIFF_SYMBOL_BINARY_DIFF_HEADER_LITERAL, + DIFF_SYMBOL_BINARY_DIFF_BODY, + DIFF_SYMBOL_BINARY_DIFF_FOOTER, + DIFF_SYMBOL_STATS_SUMMARY_NO_FILES, + DIFF_SYMBOL_STATS_SUMMARY_ABBREV, + DIFF_SYMBOL_STATS_SUMMARY_INSERTS_DELETES, + DIFF_SYMBOL_STATS_LINE, + DIFF_SYMBOL_WORD_DIFF, + DIFF_SYMBOL_STAT_SEP, + DIFF_SYMBOL_SUMMARY, + DIFF_SYMBOL_SUBMODULE_ADD, + DIFF_SYMBOL_SUBMODULE_DEL, + DIFF_SYMBOL_SUBMODULE_UNTRACKED, + DIFF_SYMBOL_SUBMODULE_MODIFIED, + DIFF_SYMBOL_SUBMODULE_HEADER, + DIFF_SYMBOL_SUBMODULE_ERROR, + DIFF_SYMBOL_SUBMODULE_PIPETHROUGH, + DIFF_SYMBOL_REWRITE_DIFF, + DIFF_SYMBOL_BINARY_FILES, + DIFF_SYMBOL_HEADER, + DIFF_SYMBOL_FILEPAIR_PLUS, + DIFF_SYMBOL_FILEPAIR_MINUS, + DIFF_SYMBOL_WORDS_PORCELAIN, + DIFF_SYMBOL_WORDS, + DIFF_SYMBOL_CONTEXT, + DIFF_SYMBOL_CONTEXT_INCOMPLETE, + DIFF_SYMBOL_PLUS, + DIFF_SYMBOL_MINUS, + DIFF_SYMBOL_NO_LF_EOF, + DIFF_SYMBOL_CONTEXT_FRAGINFO, + DIFF_SYMBOL_CONTEXT_MARKER, + DIFF_SYMBOL_SEPARATOR +}; +/* + * Flags for content lines: + * 0..12 are whitespace rules + * 13-15 are WSEH_NEW | WSEH_OLD | WSEH_CONTEXT + * 16 is marking if the line is blank at EOF + */ +#define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16) +#define DIFF_SYMBOL_MOVED_LINE (1<<17) +#define DIFF_SYMBOL_MOVED_LINE_ALT (1<<18) +#define DIFF_SYMBOL_MOVED_LINE_UNINTERESTING (1<<19) +#define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | WS_RULE_MASK) + +/* + * This struct is used when we need to buffer the output of the diff output. + * + * NEEDSWORK: Instead of storing a copy of the line, add an offset pointer + * into the pre/post image file. This pointer could be a union with the + * line pointer. By storing an offset into the file instead of the literal line, + * we can decrease the memory footprint for the buffered output. At first we + * may want to only have indirection for the content lines, but we could also + * enhance the state for emitting prefabricated lines, e.g. the similarity + * score line or hunk/file headers would only need to store a number or path + * and then the output can be constructed later on depending on state. + */ +struct emitted_diff_symbol { + const char *line; + int len; + int flags; + enum diff_symbol s; +}; +#define EMITTED_DIFF_SYMBOL_INIT {NULL} + +struct emitted_diff_symbols { + struct emitted_diff_symbol *buf; + int nr, alloc; +}; +#define EMITTED_DIFF_SYMBOLS_INIT {NULL, 0, 0} + +static void append_emitted_diff_symbol(struct diff_options *o, + struct emitted_diff_symbol *e) { - if (!((ecbdata->ws_rule & WS_BLANK_AT_EOF) && - ecbdata->blank_at_eof_in_preimage && - ecbdata->blank_at_eof_in_postimage && - ecbdata->blank_at_eof_in_preimage <= ecbdata->lno_in_preimage && - ecbdata->blank_at_eof_in_postimage <= ecbdata->lno_in_postimage)) - return 0; - return ws_blank_line(line, len, ecbdata->ws_rule); + struct emitted_diff_symbol *f; + + ALLOC_GROW(o->emitted_symbols->buf, + o->emitted_symbols->nr + 1, + o->emitted_symbols->alloc); + f = &o->emitted_symbols->buf[o->emitted_symbols->nr++]; + + memcpy(f, e, sizeof(struct emitted_diff_symbol)); + f->line = e->line ? xmemdupz(e->line, e->len) : NULL; } -static void emit_line_checked(const char *reset, - struct emit_callback *ecbdata, - const char *line, int len, - enum color_diff color, - unsigned ws_error_highlight, - char sign) +struct moved_entry { + struct hashmap_entry ent; + const struct emitted_diff_symbol *es; + struct moved_entry *next_line; +}; + +static int next_byte(const char **cp, const char **endp, + const struct diff_options *diffopt) +{ + int retval; + + if (*cp > *endp) + return -1; + + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_CHANGE)) { + while (*cp < *endp && isspace(**cp)) + (*cp)++; + /* + * After skipping a couple of whitespaces, we still have to + * account for one space. + */ + return (int)' '; + } + + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE)) { + while (*cp < *endp && isspace(**cp)) + (*cp)++; + /* return the first non-ws character via the usual below */ + } + + retval = (unsigned char)(**cp); + (*cp)++; + return retval; +} + +static int moved_entry_cmp(const struct diff_options *diffopt, + const struct moved_entry *a, + const struct moved_entry *b, + const void *keydata) +{ + const char *ap = a->es->line, *ae = a->es->line + a->es->len; + const char *bp = b->es->line, *be = b->es->line + b->es->len; + + if (!(diffopt->xdl_opts & XDF_WHITESPACE_FLAGS)) + return a->es->len != b->es->len || memcmp(ap, bp, a->es->len); + + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_AT_EOL)) { + while (ae > ap && isspace(*ae)) + ae--; + while (be > bp && isspace(*be)) + be--; + } + + while (1) { + int ca, cb; + ca = next_byte(&ap, &ae, diffopt); + cb = next_byte(&bp, &be, diffopt); + if (ca != cb) + return 1; + if (ca < 0) + return 0; + } +} + +static unsigned get_string_hash(struct emitted_diff_symbol *es, struct diff_options *o) +{ + if (o->xdl_opts & XDF_WHITESPACE_FLAGS) { + static struct strbuf sb = STRBUF_INIT; + const char *ap = es->line, *ae = es->line + es->len; + int c; + + strbuf_reset(&sb); + while (ae > ap && isspace(*ae)) + ae--; + while ((c = next_byte(&ap, &ae, o)) > 0) + strbuf_addch(&sb, c); + + return memhash(sb.buf, sb.len); + } else { + return memhash(es->line, es->len); + } +} + +static struct moved_entry *prepare_entry(struct diff_options *o, + int line_no) +{ + struct moved_entry *ret = xmalloc(sizeof(*ret)); + struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no]; + + ret->ent.hash = get_string_hash(l, o); + ret->es = l; + ret->next_line = NULL; + + return ret; +} + +static void add_lines_to_move_detection(struct diff_options *o, + struct hashmap *add_lines, + struct hashmap *del_lines) +{ + struct moved_entry *prev_line = NULL; + + int n; + for (n = 0; n < o->emitted_symbols->nr; n++) { + struct hashmap *hm; + struct moved_entry *key; + + switch (o->emitted_symbols->buf[n].s) { + case DIFF_SYMBOL_PLUS: + hm = add_lines; + break; + case DIFF_SYMBOL_MINUS: + hm = del_lines; + break; + default: + prev_line = NULL; + continue; + } + + key = prepare_entry(o, n); + if (prev_line && prev_line->es->s == o->emitted_symbols->buf[n].s) + prev_line->next_line = key; + + hashmap_add(hm, key); + prev_line = key; + } +} + +static int shrink_potential_moved_blocks(struct moved_entry **pmb, + int pmb_nr) +{ + int lp, rp; + + /* Shrink the set of potential block to the remaining running */ + for (lp = 0, rp = pmb_nr - 1; lp <= rp;) { + while (lp < pmb_nr && pmb[lp]) + lp++; + /* lp points at the first NULL now */ + + while (rp > -1 && !pmb[rp]) + rp--; + /* rp points at the last non-NULL */ + + if (lp < pmb_nr && rp > -1 && lp < rp) { + pmb[lp] = pmb[rp]; + pmb[rp] = NULL; + rp--; + lp++; + } + } + + /* Remember the number of running sets */ + return rp + 1; +} + +/* + * If o->color_moved is COLOR_MOVED_PLAIN, this function does nothing. + * + * Otherwise, if the last block has fewer alphanumeric characters than + * COLOR_MOVED_MIN_ALNUM_COUNT, unset DIFF_SYMBOL_MOVED_LINE on all lines in + * that block. + * + * The last block consists of the (n - block_length)'th line up to but not + * including the nth line. + * + * NEEDSWORK: This uses the same heuristic as blame_entry_score() in blame.c. + * Think of a way to unify them. + */ +static void adjust_last_block(struct diff_options *o, int n, int block_length) +{ + int i, alnum_count = 0; + if (o->color_moved == COLOR_MOVED_PLAIN) + return; + for (i = 1; i < block_length + 1; i++) { + const char *c = o->emitted_symbols->buf[n - i].line; + for (; *c; c++) { + if (!isalnum(*c)) + continue; + alnum_count++; + if (alnum_count >= COLOR_MOVED_MIN_ALNUM_COUNT) + return; + } + } + for (i = 1; i < block_length + 1; i++) + o->emitted_symbols->buf[n - i].flags &= ~DIFF_SYMBOL_MOVED_LINE; +} + +/* Find blocks of moved code, delegate actual coloring decision to helper */ +static void mark_color_as_moved(struct diff_options *o, + struct hashmap *add_lines, + struct hashmap *del_lines) +{ + struct moved_entry **pmb = NULL; /* potentially moved blocks */ + int pmb_nr = 0, pmb_alloc = 0; + int n, flipped_block = 1, block_length = 0; + + + for (n = 0; n < o->emitted_symbols->nr; n++) { + struct hashmap *hm = NULL; + struct moved_entry *key; + struct moved_entry *match = NULL; + struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; + int i; + + switch (l->s) { + case DIFF_SYMBOL_PLUS: + hm = del_lines; + key = prepare_entry(o, n); + match = hashmap_get(hm, key, o); + free(key); + break; + case DIFF_SYMBOL_MINUS: + hm = add_lines; + key = prepare_entry(o, n); + match = hashmap_get(hm, key, o); + free(key); + break; + default: + flipped_block = 1; + } + + if (!match) { + adjust_last_block(o, n, block_length); + pmb_nr = 0; + block_length = 0; + continue; + } + + l->flags |= DIFF_SYMBOL_MOVED_LINE; + + if (o->color_moved == COLOR_MOVED_PLAIN) + continue; + + /* Check any potential block runs, advance each or nullify */ + for (i = 0; i < pmb_nr; i++) { + struct moved_entry *p = pmb[i]; + struct moved_entry *pnext = (p && p->next_line) ? + p->next_line : NULL; + if (pnext && !hm->cmpfn(o, pnext, match, NULL)) { + pmb[i] = p->next_line; + } else { + pmb[i] = NULL; + } + } + + pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); + + if (pmb_nr == 0) { + /* + * The current line is the start of a new block. + * Setup the set of potential blocks. + */ + for (; match; match = hashmap_get_next(hm, match)) { + ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc); + pmb[pmb_nr++] = match; + } + + flipped_block = (flipped_block + 1) % 2; + + adjust_last_block(o, n, block_length); + block_length = 0; + } + + block_length++; + + if (flipped_block) + l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT; + } + adjust_last_block(o, n, block_length); + + free(pmb); +} + +#define DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK \ + (DIFF_SYMBOL_MOVED_LINE | DIFF_SYMBOL_MOVED_LINE_ALT) +static void dim_moved_lines(struct diff_options *o) +{ + int n; + for (n = 0; n < o->emitted_symbols->nr; n++) { + struct emitted_diff_symbol *prev = (n != 0) ? + &o->emitted_symbols->buf[n - 1] : NULL; + struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; + struct emitted_diff_symbol *next = + (n < o->emitted_symbols->nr - 1) ? + &o->emitted_symbols->buf[n + 1] : NULL; + + /* Not a plus or minus line? */ + if (l->s != DIFF_SYMBOL_PLUS && l->s != DIFF_SYMBOL_MINUS) + continue; + + /* Not a moved line? */ + if (!(l->flags & DIFF_SYMBOL_MOVED_LINE)) + continue; + + /* + * If prev or next are not a plus or minus line, + * pretend they don't exist + */ + if (prev && prev->s != DIFF_SYMBOL_PLUS && + prev->s != DIFF_SYMBOL_MINUS) + prev = NULL; + if (next && next->s != DIFF_SYMBOL_PLUS && + next->s != DIFF_SYMBOL_MINUS) + next = NULL; + + /* Inside a block? */ + if ((prev && + (prev->flags & DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK) == + (l->flags & DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK)) && + (next && + (next->flags & DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK) == + (l->flags & DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK))) { + l->flags |= DIFF_SYMBOL_MOVED_LINE_UNINTERESTING; + continue; + } + + /* Check if we are at an interesting bound: */ + if (prev && (prev->flags & DIFF_SYMBOL_MOVED_LINE) && + (prev->flags & DIFF_SYMBOL_MOVED_LINE_ALT) != + (l->flags & DIFF_SYMBOL_MOVED_LINE_ALT)) + continue; + if (next && (next->flags & DIFF_SYMBOL_MOVED_LINE) && + (next->flags & DIFF_SYMBOL_MOVED_LINE_ALT) != + (l->flags & DIFF_SYMBOL_MOVED_LINE_ALT)) + continue; + + /* + * The boundary to prev and next are not interesting, + * so this line is not interesting as a whole + */ + l->flags |= DIFF_SYMBOL_MOVED_LINE_UNINTERESTING; + } +} + +static void emit_line_ws_markup(struct diff_options *o, + const char *set, const char *reset, + const char *line, int len, char sign, + unsigned ws_rule, int blank_at_eof) { - const char *set = diff_get_color(ecbdata->color_diff, color); const char *ws = NULL; - if (ecbdata->opt->ws_error_highlight & ws_error_highlight) { - ws = diff_get_color(ecbdata->color_diff, DIFF_WHITESPACE); + if (o->ws_error_highlight & ws_rule) { + ws = diff_get_color_opt(o, DIFF_WHITESPACE); if (!*ws) ws = NULL; } if (!ws) - emit_line_0(ecbdata->opt, set, reset, sign, line, len); - else if (sign == '+' && new_blank_line_at_eof(ecbdata, line, len)) + emit_line_0(o, set, reset, sign, line, len); + else if (blank_at_eof) /* Blank line at EOF - paint '+' as well */ - emit_line_0(ecbdata->opt, ws, reset, sign, line, len); + emit_line_0(o, ws, reset, sign, line, len); else { /* Emit just the prefix, then the rest. */ - emit_line_0(ecbdata->opt, set, reset, sign, "", 0); - ws_check_emit(line, len, ecbdata->ws_rule, - ecbdata->opt->file, set, reset, ws); + emit_line_0(o, set, reset, sign, "", 0); + ws_check_emit(line, len, ws_rule, + o->file, set, reset, ws); + } +} + +static void emit_diff_symbol_from_struct(struct diff_options *o, + struct emitted_diff_symbol *eds) +{ + static const char *nneof = " No newline at end of file\n"; + const char *context, *reset, *set, *meta, *fraginfo; + struct strbuf sb = STRBUF_INIT; + + enum diff_symbol s = eds->s; + const char *line = eds->line; + int len = eds->len; + unsigned flags = eds->flags; + + switch (s) { + case DIFF_SYMBOL_NO_LF_EOF: + context = diff_get_color_opt(o, DIFF_CONTEXT); + reset = diff_get_color_opt(o, DIFF_RESET); + putc('\n', o->file); + emit_line_0(o, context, reset, '\\', + nneof, strlen(nneof)); + break; + case DIFF_SYMBOL_SUBMODULE_HEADER: + case DIFF_SYMBOL_SUBMODULE_ERROR: + case DIFF_SYMBOL_SUBMODULE_PIPETHROUGH: + case DIFF_SYMBOL_STATS_SUMMARY_INSERTS_DELETES: + case DIFF_SYMBOL_SUMMARY: + case DIFF_SYMBOL_STATS_LINE: + case DIFF_SYMBOL_BINARY_DIFF_BODY: + case DIFF_SYMBOL_CONTEXT_FRAGINFO: + emit_line(o, "", "", line, len); + break; + case DIFF_SYMBOL_CONTEXT_INCOMPLETE: + case DIFF_SYMBOL_CONTEXT_MARKER: + context = diff_get_color_opt(o, DIFF_CONTEXT); + reset = diff_get_color_opt(o, DIFF_RESET); + emit_line(o, context, reset, line, len); + break; + case DIFF_SYMBOL_SEPARATOR: + fprintf(o->file, "%s%c", + diff_line_prefix(o), + o->line_termination); + break; + case DIFF_SYMBOL_CONTEXT: + set = diff_get_color_opt(o, DIFF_CONTEXT); + reset = diff_get_color_opt(o, DIFF_RESET); + emit_line_ws_markup(o, set, reset, line, len, ' ', + flags & (DIFF_SYMBOL_CONTENT_WS_MASK), 0); + break; + case DIFF_SYMBOL_PLUS: + switch (flags & (DIFF_SYMBOL_MOVED_LINE | + DIFF_SYMBOL_MOVED_LINE_ALT | + DIFF_SYMBOL_MOVED_LINE_UNINTERESTING)) { + case DIFF_SYMBOL_MOVED_LINE | + DIFF_SYMBOL_MOVED_LINE_ALT | + DIFF_SYMBOL_MOVED_LINE_UNINTERESTING: + set = diff_get_color_opt(o, DIFF_FILE_NEW_MOVED_ALT_DIM); + break; + case DIFF_SYMBOL_MOVED_LINE | + DIFF_SYMBOL_MOVED_LINE_ALT: + set = diff_get_color_opt(o, DIFF_FILE_NEW_MOVED_ALT); + break; + case DIFF_SYMBOL_MOVED_LINE | + DIFF_SYMBOL_MOVED_LINE_UNINTERESTING: + set = diff_get_color_opt(o, DIFF_FILE_NEW_MOVED_DIM); + break; + case DIFF_SYMBOL_MOVED_LINE: + set = diff_get_color_opt(o, DIFF_FILE_NEW_MOVED); + break; + default: + set = diff_get_color_opt(o, DIFF_FILE_NEW); + } + reset = diff_get_color_opt(o, DIFF_RESET); + emit_line_ws_markup(o, set, reset, line, len, '+', + flags & DIFF_SYMBOL_CONTENT_WS_MASK, + flags & DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF); + break; + case DIFF_SYMBOL_MINUS: + switch (flags & (DIFF_SYMBOL_MOVED_LINE | + DIFF_SYMBOL_MOVED_LINE_ALT | + DIFF_SYMBOL_MOVED_LINE_UNINTERESTING)) { + case DIFF_SYMBOL_MOVED_LINE | + DIFF_SYMBOL_MOVED_LINE_ALT | + DIFF_SYMBOL_MOVED_LINE_UNINTERESTING: + set = diff_get_color_opt(o, DIFF_FILE_OLD_MOVED_ALT_DIM); + break; + case DIFF_SYMBOL_MOVED_LINE | + DIFF_SYMBOL_MOVED_LINE_ALT: + set = diff_get_color_opt(o, DIFF_FILE_OLD_MOVED_ALT); + break; + case DIFF_SYMBOL_MOVED_LINE | + DIFF_SYMBOL_MOVED_LINE_UNINTERESTING: + set = diff_get_color_opt(o, DIFF_FILE_OLD_MOVED_DIM); + break; + case DIFF_SYMBOL_MOVED_LINE: + set = diff_get_color_opt(o, DIFF_FILE_OLD_MOVED); + break; + default: + set = diff_get_color_opt(o, DIFF_FILE_OLD); + } + reset = diff_get_color_opt(o, DIFF_RESET); + emit_line_ws_markup(o, set, reset, line, len, '-', + flags & DIFF_SYMBOL_CONTENT_WS_MASK, 0); + break; + case DIFF_SYMBOL_WORDS_PORCELAIN: + context = diff_get_color_opt(o, DIFF_CONTEXT); + reset = diff_get_color_opt(o, DIFF_RESET); + emit_line(o, context, reset, line, len); + fputs("~\n", o->file); + break; + case DIFF_SYMBOL_WORDS: + context = diff_get_color_opt(o, DIFF_CONTEXT); + reset = diff_get_color_opt(o, DIFF_RESET); + /* + * Skip the prefix character, if any. With + * diff_suppress_blank_empty, there may be + * none. + */ + if (line[0] != '\n') { + line++; + len--; + } + emit_line(o, context, reset, line, len); + break; + case DIFF_SYMBOL_FILEPAIR_PLUS: + meta = diff_get_color_opt(o, DIFF_METAINFO); + reset = diff_get_color_opt(o, DIFF_RESET); + fprintf(o->file, "%s%s+++ %s%s%s\n", diff_line_prefix(o), meta, + line, reset, + strchr(line, ' ') ? "\t" : ""); + break; + case DIFF_SYMBOL_FILEPAIR_MINUS: + meta = diff_get_color_opt(o, DIFF_METAINFO); + reset = diff_get_color_opt(o, DIFF_RESET); + fprintf(o->file, "%s%s--- %s%s%s\n", diff_line_prefix(o), meta, + line, reset, + strchr(line, ' ') ? "\t" : ""); + break; + case DIFF_SYMBOL_BINARY_FILES: + case DIFF_SYMBOL_HEADER: + fprintf(o->file, "%s", line); + break; + case DIFF_SYMBOL_BINARY_DIFF_HEADER: + fprintf(o->file, "%sGIT binary patch\n", diff_line_prefix(o)); + break; + case DIFF_SYMBOL_BINARY_DIFF_HEADER_DELTA: + fprintf(o->file, "%sdelta %s\n", diff_line_prefix(o), line); + break; + case DIFF_SYMBOL_BINARY_DIFF_HEADER_LITERAL: + fprintf(o->file, "%sliteral %s\n", diff_line_prefix(o), line); + break; + case DIFF_SYMBOL_BINARY_DIFF_FOOTER: + fputs(diff_line_prefix(o), o->file); + fputc('\n', o->file); + break; + case DIFF_SYMBOL_REWRITE_DIFF: + fraginfo = diff_get_color(o->use_color, DIFF_FRAGINFO); + reset = diff_get_color_opt(o, DIFF_RESET); + emit_line(o, fraginfo, reset, line, len); + break; + case DIFF_SYMBOL_SUBMODULE_ADD: + set = diff_get_color_opt(o, DIFF_FILE_NEW); + reset = diff_get_color_opt(o, DIFF_RESET); + emit_line(o, set, reset, line, len); + break; + case DIFF_SYMBOL_SUBMODULE_DEL: + set = diff_get_color_opt(o, DIFF_FILE_OLD); + reset = diff_get_color_opt(o, DIFF_RESET); + emit_line(o, set, reset, line, len); + break; + case DIFF_SYMBOL_SUBMODULE_UNTRACKED: + fprintf(o->file, "%sSubmodule %s contains untracked content\n", + diff_line_prefix(o), line); + break; + case DIFF_SYMBOL_SUBMODULE_MODIFIED: + fprintf(o->file, "%sSubmodule %s contains modified content\n", + diff_line_prefix(o), line); + break; + case DIFF_SYMBOL_STATS_SUMMARY_NO_FILES: + emit_line(o, "", "", " 0 files changed\n", + strlen(" 0 files changed\n")); + break; + case DIFF_SYMBOL_STATS_SUMMARY_ABBREV: + emit_line(o, "", "", " ...\n", strlen(" ...\n")); + break; + case DIFF_SYMBOL_WORD_DIFF: + fprintf(o->file, "%.*s", len, line); + break; + case DIFF_SYMBOL_STAT_SEP: + fputs(o->stat_sep, o->file); + break; + default: + die("BUG: unknown diff symbol"); } + strbuf_release(&sb); +} + +static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s, + const char *line, int len, unsigned flags) +{ + struct emitted_diff_symbol e = {line, len, flags, s}; + + if (o->emitted_symbols) + append_emitted_diff_symbol(o, &e); + else + emit_diff_symbol_from_struct(o, &e); +} + +void diff_emit_submodule_del(struct diff_options *o, const char *line) +{ + emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_DEL, line, strlen(line), 0); +} + +void diff_emit_submodule_add(struct diff_options *o, const char *line) +{ + emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_ADD, line, strlen(line), 0); +} + +void diff_emit_submodule_untracked(struct diff_options *o, const char *path) +{ + emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_UNTRACKED, + path, strlen(path), 0); +} + +void diff_emit_submodule_modified(struct diff_options *o, const char *path) +{ + emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_MODIFIED, + path, strlen(path), 0); +} + +void diff_emit_submodule_header(struct diff_options *o, const char *header) +{ + emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_HEADER, + header, strlen(header), 0); +} + +void diff_emit_submodule_error(struct diff_options *o, const char *err) +{ + emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_ERROR, err, strlen(err), 0); +} + +void diff_emit_submodule_pipethrough(struct diff_options *o, + const char *line, int len) +{ + emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_PIPETHROUGH, line, len, 0); +} + +static int new_blank_line_at_eof(struct emit_callback *ecbdata, const char *line, int len) +{ + if (!((ecbdata->ws_rule & WS_BLANK_AT_EOF) && + ecbdata->blank_at_eof_in_preimage && + ecbdata->blank_at_eof_in_postimage && + ecbdata->blank_at_eof_in_preimage <= ecbdata->lno_in_preimage && + ecbdata->blank_at_eof_in_postimage <= ecbdata->lno_in_postimage)) + return 0; + return ws_blank_line(line, len, ecbdata->ws_rule); } static void emit_add_line(const char *reset, struct emit_callback *ecbdata, const char *line, int len) { - emit_line_checked(reset, ecbdata, line, len, - DIFF_FILE_NEW, WSEH_NEW, '+'); + unsigned flags = WSEH_NEW | ecbdata->ws_rule; + if (new_blank_line_at_eof(ecbdata, line, len)) + flags |= DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF; + + emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_PLUS, line, len, flags); } static void emit_del_line(const char *reset, struct emit_callback *ecbdata, const char *line, int len) { - emit_line_checked(reset, ecbdata, line, len, - DIFF_FILE_OLD, WSEH_OLD, '-'); + unsigned flags = WSEH_OLD | ecbdata->ws_rule; + emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_MINUS, line, len, flags); } static void emit_context_line(const char *reset, struct emit_callback *ecbdata, const char *line, int len) { - emit_line_checked(reset, ecbdata, line, len, - DIFF_CONTEXT, WSEH_CONTEXT, ' '); + unsigned flags = WSEH_CONTEXT | ecbdata->ws_rule; + emit_diff_symbol(ecbdata->opt, DIFF_SYMBOL_CONTEXT, line, len, flags); } static void emit_hunk_header(struct emit_callback *ecbdata, @@@ -1360,8 -639,7 +1361,8 @@@ if (len < 10 || memcmp(line, atat, 2) || !(ep = memmem(line + 2, len - 2, atat, 2))) { - emit_line(ecbdata->opt, context, reset, line, len); + emit_diff_symbol(ecbdata->opt, + DIFF_SYMBOL_CONTEXT_MARKER, line, len, 0); return; } ep += 2; /* skip over @@ */ @@@ -1395,9 -673,7 +1396,9 @@@ } strbuf_add(&msgbuf, line + len, org_len - len); - emit_line(ecbdata->opt, "", "", msgbuf.buf, msgbuf.len); + strbuf_complete_line(&msgbuf); + emit_diff_symbol(ecbdata->opt, + DIFF_SYMBOL_CONTEXT_FRAGINFO, msgbuf.buf, msgbuf.len, 0); strbuf_release(&msgbuf); } @@@ -1419,17 -695,17 +1420,17 @@@ static void remove_tempfile(void } } -static void print_line_count(FILE *file, int count) +static void add_line_count(struct strbuf *out, int count) { switch (count) { case 0: - fprintf(file, "0,0"); + strbuf_addstr(out, "0,0"); break; case 1: - fprintf(file, "1"); + strbuf_addstr(out, "1"); break; default: - fprintf(file, "1,%d", count); + strbuf_addf(out, "1,%d", count); break; } } @@@ -1438,6 -714,7 +1439,6 @@@ static void emit_rewrite_lines(struct e int prefix, const char *data, int size) { const char *endp = NULL; - static const char *nneof = " No newline at end of file\n"; const char *reset = diff_get_color(ecb->color_diff, DIFF_RESET); while (0 < size) { @@@ -1455,8 -732,13 +1456,8 @@@ size -= len; data += len; } - if (!endp) { - const char *context = diff_get_color(ecb->color_diff, - DIFF_CONTEXT); - putc('\n', ecb->opt->file); - emit_line_0(ecb->opt, context, reset, '\\', - nneof, strlen(nneof)); - } + if (!endp) + emit_diff_symbol(ecb->opt, DIFF_SYMBOL_NO_LF_EOF, NULL, 0, 0); } static void emit_rewrite_diff(const char *name_a, @@@ -1468,12 -750,16 +1469,12 @@@ struct diff_options *o) { int lc_a, lc_b; - const char *name_a_tab, *name_b_tab; - const char *metainfo = diff_get_color(o->use_color, DIFF_METAINFO); - const char *fraginfo = diff_get_color(o->use_color, DIFF_FRAGINFO); - const char *reset = diff_get_color(o->use_color, DIFF_RESET); static struct strbuf a_name = STRBUF_INIT, b_name = STRBUF_INIT; const char *a_prefix, *b_prefix; char *data_one, *data_two; size_t size_one, size_two; struct emit_callback ecbdata; - const char *line_prefix = diff_line_prefix(o); + struct strbuf out = STRBUF_INIT; if (diff_mnemonic_prefix && DIFF_OPT_TST(o, REVERSE_DIFF)) { a_prefix = o->b_prefix; @@@ -1485,6 -771,8 +1486,6 @@@ name_a += (*name_a == '/'); name_b += (*name_b == '/'); - name_a_tab = strchr(name_a, ' ') ? "\t" : ""; - name_b_tab = strchr(name_b, ' ') ? "\t" : ""; strbuf_reset(&a_name); strbuf_reset(&b_name); @@@ -1511,23 -799,18 +1512,23 @@@ lc_a = count_lines(data_one, size_one); lc_b = count_lines(data_two, size_two); - fprintf(o->file, - "%s%s--- %s%s%s\n%s%s+++ %s%s%s\n%s%s@@ -", - line_prefix, metainfo, a_name.buf, name_a_tab, reset, - line_prefix, metainfo, b_name.buf, name_b_tab, reset, - line_prefix, fraginfo); + + emit_diff_symbol(o, DIFF_SYMBOL_FILEPAIR_MINUS, + a_name.buf, a_name.len, 0); + emit_diff_symbol(o, DIFF_SYMBOL_FILEPAIR_PLUS, + b_name.buf, b_name.len, 0); + + strbuf_addstr(&out, "@@ -"); if (!o->irreversible_delete) - print_line_count(o->file, lc_a); + add_line_count(&out, lc_a); else - fprintf(o->file, "?,?"); - fprintf(o->file, " +"); - print_line_count(o->file, lc_b); - fprintf(o->file, " @@%s\n", reset); + strbuf_addstr(&out, "?,?"); + strbuf_addstr(&out, " +"); + add_line_count(&out, lc_b); + strbuf_addstr(&out, " @@\n"); + emit_diff_symbol(o, DIFF_SYMBOL_REWRITE_DIFF, out.buf, out.len, 0); + strbuf_release(&out); + if (lc_a && !o->irreversible_delete) emit_rewrite_lines(&ecbdata, '-', data_one, size_one); if (lc_b) @@@ -1587,49 -870,37 +1588,49 @@@ struct diff_words_data struct diff_words_style *style; }; -static int fn_out_diff_words_write_helper(FILE *fp, +static int fn_out_diff_words_write_helper(struct diff_options *o, struct diff_words_style_elem *st_el, const char *newline, - size_t count, const char *buf, - const char *line_prefix) + size_t count, const char *buf) { int print = 0; + struct strbuf sb = STRBUF_INIT; while (count) { char *p = memchr(buf, '\n', count); if (print) - fputs(line_prefix, fp); + strbuf_addstr(&sb, diff_line_prefix(o)); + if (p != buf) { - if (st_el->color && fputs(st_el->color, fp) < 0) - return -1; - if (fputs(st_el->prefix, fp) < 0 || - fwrite(buf, p ? p - buf : count, 1, fp) != 1 || - fputs(st_el->suffix, fp) < 0) - return -1; - if (st_el->color && *st_el->color - && fputs(GIT_COLOR_RESET, fp) < 0) - return -1; + const char *reset = st_el->color && *st_el->color ? + GIT_COLOR_RESET : NULL; + if (st_el->color && *st_el->color) + strbuf_addstr(&sb, st_el->color); + strbuf_addstr(&sb, st_el->prefix); + strbuf_add(&sb, buf, p ? p - buf : count); + strbuf_addstr(&sb, st_el->suffix); + if (reset) + strbuf_addstr(&sb, reset); } if (!p) - return 0; - if (fputs(newline, fp) < 0) - return -1; + goto out; + + strbuf_addstr(&sb, newline); count -= p + 1 - buf; buf = p + 1; print = 1; + if (count) { + emit_diff_symbol(o, DIFF_SYMBOL_WORD_DIFF, + sb.buf, sb.len, 0); + strbuf_reset(&sb); + } } + +out: + if (sb.len) + emit_diff_symbol(o, DIFF_SYMBOL_WORD_DIFF, + sb.buf, sb.len, 0); + strbuf_release(&sb); return 0; } @@@ -1711,20 -982,24 +1712,20 @@@ static void fn_out_diff_words_aux(void fputs(line_prefix, diff_words->opt->file); } if (diff_words->current_plus != plus_begin) { - fn_out_diff_words_write_helper(diff_words->opt->file, + fn_out_diff_words_write_helper(diff_words->opt, &style->ctx, style->newline, plus_begin - diff_words->current_plus, - diff_words->current_plus, line_prefix); - if (*(plus_begin - 1) == '\n') - fputs(line_prefix, diff_words->opt->file); + diff_words->current_plus); } if (minus_begin != minus_end) { - fn_out_diff_words_write_helper(diff_words->opt->file, + fn_out_diff_words_write_helper(diff_words->opt, &style->old, style->newline, - minus_end - minus_begin, minus_begin, - line_prefix); + minus_end - minus_begin, minus_begin); } if (plus_begin != plus_end) { - fn_out_diff_words_write_helper(diff_words->opt->file, + fn_out_diff_words_write_helper(diff_words->opt, &style->new, style->newline, - plus_end - plus_begin, plus_begin, - line_prefix); + plus_end - plus_begin, plus_begin); } diff_words->current_plus = plus_end; @@@ -1818,12 -1093,11 +1819,12 @@@ static void diff_words_show(struct diff /* special case: only removal */ if (!diff_words->plus.text.size) { - fputs(line_prefix, diff_words->opt->file); - fn_out_diff_words_write_helper(diff_words->opt->file, + emit_diff_symbol(diff_words->opt, DIFF_SYMBOL_WORD_DIFF, + line_prefix, strlen(line_prefix), 0); + fn_out_diff_words_write_helper(diff_words->opt, &style->old, style->newline, diff_words->minus.text.size, - diff_words->minus.text.ptr, line_prefix); + diff_words->minus.text.ptr); diff_words->minus.text.size = 0; return; } @@@ -1846,12 -1120,12 +1847,12 @@@ if (diff_words->current_plus != diff_words->plus.text.ptr + diff_words->plus.text.size) { if (color_words_output_graph_prefix(diff_words)) - fputs(line_prefix, diff_words->opt->file); - fn_out_diff_words_write_helper(diff_words->opt->file, + emit_diff_symbol(diff_words->opt, DIFF_SYMBOL_WORD_DIFF, + line_prefix, strlen(line_prefix), 0); + fn_out_diff_words_write_helper(diff_words->opt, &style->ctx, style->newline, diff_words->plus.text.ptr + diff_words->plus.text.size - - diff_words->current_plus, diff_words->current_plus, - line_prefix); + - diff_words->current_plus, diff_words->current_plus); } diff_words->minus.text.size = diff_words->plus.text.size = 0; } @@@ -1859,29 -1133,9 +1860,29 @@@ /* In "color-words" mode, show word-diff of words accumulated in the buffer */ static void diff_words_flush(struct emit_callback *ecbdata) { + struct diff_options *wo = ecbdata->diff_words->opt; + if (ecbdata->diff_words->minus.text.size || ecbdata->diff_words->plus.text.size) diff_words_show(ecbdata->diff_words); + + if (wo->emitted_symbols) { + struct diff_options *o = ecbdata->opt; + struct emitted_diff_symbols *wol = wo->emitted_symbols; + int i; + + /* + * NEEDSWORK: + * Instead of appending each, concat all words to a line? + */ + for (i = 0; i < wol->nr; i++) + append_emitted_diff_symbol(o, &wol->buf[i]); + + for (i = 0; i < wol->nr; i++) + free((void *)wol->buf[i].line); + + wol->nr = 0; + } } static void diff_filespec_load_driver(struct diff_filespec *one) @@@ -1917,11 -1171,6 +1918,11 @@@ static void init_diff_words_data(struc xcalloc(1, sizeof(struct diff_words_data)); ecbdata->diff_words->type = o->word_diff; ecbdata->diff_words->opt = o; + + if (orig_opts->emitted_symbols) + o->emitted_symbols = + xcalloc(1, sizeof(struct emitted_diff_symbols)); + if (!o->word_regex) o->word_regex = userdiff_word_regex(one); if (!o->word_regex) @@@ -1956,7 -1205,6 +1957,7 @@@ static void free_diff_words_data(struc { if (ecbdata->diff_words) { diff_words_flush(ecbdata); + free (ecbdata->diff_words->opt->emitted_symbols); free (ecbdata->diff_words->opt); free (ecbdata->diff_words->minus.text.ptr); free (ecbdata->diff_words->minus.orig); @@@ -2021,25 -1269,30 +2022,25 @@@ static void find_lno(const char *line, static void fn_out_consume(void *priv, char *line, unsigned long len) { struct emit_callback *ecbdata = priv; - const char *meta = diff_get_color(ecbdata->color_diff, DIFF_METAINFO); - const char *context = diff_get_color(ecbdata->color_diff, DIFF_CONTEXT); const char *reset = diff_get_color(ecbdata->color_diff, DIFF_RESET); struct diff_options *o = ecbdata->opt; - const char *line_prefix = diff_line_prefix(o); o->found_changes = 1; if (ecbdata->header) { - fprintf(o->file, "%s", ecbdata->header->buf); + emit_diff_symbol(o, DIFF_SYMBOL_HEADER, + ecbdata->header->buf, ecbdata->header->len, 0); strbuf_reset(ecbdata->header); ecbdata->header = NULL; } if (ecbdata->label_path[0]) { - const char *name_a_tab, *name_b_tab; - - name_a_tab = strchr(ecbdata->label_path[0], ' ') ? "\t" : ""; - name_b_tab = strchr(ecbdata->label_path[1], ' ') ? "\t" : ""; - - fprintf(o->file, "%s%s--- %s%s%s\n", - line_prefix, meta, ecbdata->label_path[0], reset, name_a_tab); - fprintf(o->file, "%s%s+++ %s%s%s\n", - line_prefix, meta, ecbdata->label_path[1], reset, name_b_tab); + emit_diff_symbol(o, DIFF_SYMBOL_FILEPAIR_MINUS, + ecbdata->label_path[0], + strlen(ecbdata->label_path[0]), 0); + emit_diff_symbol(o, DIFF_SYMBOL_FILEPAIR_PLUS, + ecbdata->label_path[1], + strlen(ecbdata->label_path[1]), 0); ecbdata->label_path[0] = ecbdata->label_path[1] = NULL; } @@@ -2055,13 -1308,12 +2056,13 @@@ len = sane_truncate_line(ecbdata, line, len); find_lno(line, ecbdata); emit_hunk_header(ecbdata, line, len); - if (line[len-1] != '\n') - putc('\n', o->file); return; } if (ecbdata->diff_words) { + enum diff_symbol s = + ecbdata->diff_words->type == DIFF_WORDS_PORCELAIN ? + DIFF_SYMBOL_WORDS_PORCELAIN : DIFF_SYMBOL_WORDS; if (line[0] == '-') { diff_words_append(line, len, &ecbdata->diff_words->minus); @@@ -2081,7 -1333,21 +2082,7 @@@ return; } diff_words_flush(ecbdata); - if (ecbdata->diff_words->type == DIFF_WORDS_PORCELAIN) { - emit_line(o, context, reset, line, len); - fputs("~\n", o->file); - } else { - /* - * Skip the prefix character, if any. With - * diff_suppress_blank_empty, there may be - * none. - */ - if (line[0] != '\n') { - line++; - len--; - } - emit_line(o, context, reset, line, len); - } + emit_diff_symbol(o, s, line, len, 0); return; } @@@ -2102,8 -1368,8 +2103,8 @@@ default: /* incomplete line at the end */ ecbdata->lno_in_preimage++; - emit_line(o, diff_get_color(ecbdata->color_diff, DIFF_CONTEXT), - reset, line, len); + emit_diff_symbol(o, DIFF_SYMBOL_CONTEXT_INCOMPLETE, + line, len, 0); break; } } @@@ -2248,14 -1514,20 +2249,14 @@@ static int scale_linear(int it, int wid return 1 + (it * (width - 1) / max_change); } -static void show_name(FILE *file, - const char *prefix, const char *name, int len) -{ - fprintf(file, " %s%-*s |", prefix, len, name); -} - -static void show_graph(FILE *file, char ch, int cnt, const char *set, const char *reset) +static void show_graph(struct strbuf *out, char ch, int cnt, + const char *set, const char *reset) { if (cnt <= 0) return; - fprintf(file, "%s", set); - while (cnt--) - putc(ch, file); - fprintf(file, "%s", reset); + strbuf_addstr(out, set); + strbuf_addchars(out, ch, cnt); + strbuf_addstr(out, reset); } static void fill_print_name(struct diffstat_file *file) @@@ -2279,16 -1551,14 +2280,16 @@@ file->print_name = pname; } -int print_stat_summary(FILE *fp, int files, int insertions, int deletions) +static void print_stat_summary_inserts_deletes(struct diff_options *options, + int files, int insertions, int deletions) { struct strbuf sb = STRBUF_INIT; - int ret; if (!files) { assert(insertions == 0 && deletions == 0); - return fprintf(fp, "%s\n", " 0 files changed"); + emit_diff_symbol(options, DIFF_SYMBOL_STATS_SUMMARY_NO_FILES, + NULL, 0, 0); + return; } strbuf_addf(&sb, @@@ -2315,19 -1585,9 +2316,19 @@@ deletions); } strbuf_addch(&sb, '\n'); - ret = fputs(sb.buf, fp); + emit_diff_symbol(options, DIFF_SYMBOL_STATS_SUMMARY_INSERTS_DELETES, + sb.buf, sb.len, 0); strbuf_release(&sb); - return ret; +} + +void print_stat_summary(FILE *fp, int files, + int insertions, int deletions) +{ + struct diff_options o; + memset(&o, 0, sizeof(o)); + o.file = fp; + + print_stat_summary_inserts_deletes(&o, files, insertions, deletions); } static void show_stats(struct diffstat_t *data, struct diff_options *options) @@@ -2337,13 -1597,13 +2338,13 @@@ int total_files = data->nr, count; int width, name_width, graph_width, number_width = 0, bin_width = 0; const char *reset, *add_c, *del_c; - const char *line_prefix = ""; int extra_shown = 0; + const char *line_prefix = diff_line_prefix(options); + struct strbuf out = STRBUF_INIT; if (data->nr == 0) return; - line_prefix = diff_line_prefix(options); count = options->stat_count ? options->stat_count : data->nr; reset = diff_get_color_opt(options, DIFF_RESET); @@@ -2497,32 -1757,26 +2498,32 @@@ } if (file->is_binary) { - fprintf(options->file, "%s", line_prefix); - show_name(options->file, prefix, name, len); - fprintf(options->file, " %*s", number_width, "Bin"); + strbuf_addf(&out, " %s%-*s |", prefix, len, name); + strbuf_addf(&out, " %*s", number_width, "Bin"); if (!added && !deleted) { - putc('\n', options->file); + strbuf_addch(&out, '\n'); + emit_diff_symbol(options, DIFF_SYMBOL_STATS_LINE, + out.buf, out.len, 0); + strbuf_reset(&out); continue; } - fprintf(options->file, " %s%"PRIuMAX"%s", + strbuf_addf(&out, " %s%"PRIuMAX"%s", del_c, deleted, reset); - fprintf(options->file, " -> "); - fprintf(options->file, "%s%"PRIuMAX"%s", + strbuf_addstr(&out, " -> "); + strbuf_addf(&out, "%s%"PRIuMAX"%s", add_c, added, reset); - fprintf(options->file, " bytes"); - fprintf(options->file, "\n"); + strbuf_addstr(&out, " bytes\n"); + emit_diff_symbol(options, DIFF_SYMBOL_STATS_LINE, + out.buf, out.len, 0); + strbuf_reset(&out); continue; } else if (file->is_unmerged) { - fprintf(options->file, "%s", line_prefix); - show_name(options->file, prefix, name, len); - fprintf(options->file, " Unmerged\n"); + strbuf_addf(&out, " %s%-*s |", prefix, len, name); + strbuf_addstr(&out, " Unmerged\n"); + emit_diff_symbol(options, DIFF_SYMBOL_STATS_LINE, + out.buf, out.len, 0); + strbuf_reset(&out); continue; } @@@ -2545,16 -1799,14 +2546,16 @@@ add = total - del; } } - fprintf(options->file, "%s", line_prefix); - show_name(options->file, prefix, name, len); - fprintf(options->file, " %*"PRIuMAX"%s", + strbuf_addf(&out, " %s%-*s |", prefix, len, name); + strbuf_addf(&out, " %*"PRIuMAX"%s", number_width, added + deleted, added + deleted ? " " : ""); - show_graph(options->file, '+', add, add_c, reset); - show_graph(options->file, '-', del, del_c, reset); - fprintf(options->file, "\n"); + show_graph(&out, '+', add, add_c, reset); + show_graph(&out, '-', del, del_c, reset); + strbuf_addch(&out, '\n'); + emit_diff_symbol(options, DIFF_SYMBOL_STATS_LINE, + out.buf, out.len, 0); + strbuf_reset(&out); } for (i = 0; i < data->nr; i++) { @@@ -2575,13 -1827,11 +2576,13 @@@ if (i < count) continue; if (!extra_shown) - fprintf(options->file, "%s ...\n", line_prefix); + emit_diff_symbol(options, + DIFF_SYMBOL_STATS_SUMMARY_ABBREV, + NULL, 0, 0); extra_shown = 1; } - fprintf(options->file, "%s", line_prefix); - print_stat_summary(options->file, total_files, adds, dels); + + print_stat_summary_inserts_deletes(options, total_files, adds, dels); } static void show_shortstats(struct diffstat_t *data, struct diff_options *options) @@@ -2593,7 -1843,7 +2594,7 @@@ for (i = 0; i < data->nr; i++) { int added = data->files[i]->added; - int deleted= data->files[i]->deleted; + int deleted = data->files[i]->deleted; if (data->files[i]->is_unmerged || (!data->files[i]->is_interesting && (added + deleted == 0))) { @@@ -2603,7 -1853,8 +2604,7 @@@ dels += deleted; } } - fprintf(options->file, "%s", diff_line_prefix(options)); - print_stat_summary(options->file, total_files, adds, dels); + print_stat_summary_inserts_deletes(options, total_files, adds, dels); } static void show_numstat(struct diffstat_t *data, struct diff_options *options) @@@ -2967,8 -2218,8 +2968,8 @@@ static unsigned char *deflate_it(char * return deflated; } -static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two, - const char *prefix) +static void emit_binary_diff_body(struct diff_options *o, + mmfile_t *one, mmfile_t *two) { void *cp; void *delta; @@@ -2997,18 -2248,13 +2998,18 @@@ } if (delta && delta_size < deflate_size) { - fprintf(file, "%sdelta %lu\n", prefix, orig_size); + char *s = xstrfmt("%lu", orig_size); + emit_diff_symbol(o, DIFF_SYMBOL_BINARY_DIFF_HEADER_DELTA, + s, strlen(s), 0); + free(s); free(deflated); data = delta; data_size = delta_size; - } - else { - fprintf(file, "%sliteral %lu\n", prefix, two->size); + } else { + char *s = xstrfmt("%lu", two->size); + emit_diff_symbol(o, DIFF_SYMBOL_BINARY_DIFF_HEADER_LITERAL, + s, strlen(s), 0); + free(s); free(delta); data = deflated; data_size = deflate_size; @@@ -3017,9 -2263,8 +3018,9 @@@ /* emit data encoded in base85 */ cp = data; while (data_size) { + int len; int bytes = (52 < data_size) ? 52 : data_size; - char line[70]; + char line[71]; data_size -= bytes; if (bytes <= 26) line[0] = bytes + 'A' - 1; @@@ -3027,24 -2272,20 +3028,24 @@@ line[0] = bytes - 26 + 'a' - 1; encode_85(line + 1, cp, bytes); cp = (char *) cp + bytes; - fprintf(file, "%s", prefix); - fputs(line, file); - fputc('\n', file); + + len = strlen(line); + line[len++] = '\n'; + line[len] = '\0'; + + emit_diff_symbol(o, DIFF_SYMBOL_BINARY_DIFF_BODY, + line, len, 0); } - fprintf(file, "%s\n", prefix); + emit_diff_symbol(o, DIFF_SYMBOL_BINARY_DIFF_FOOTER, NULL, 0, 0); free(data); } -static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two, - const char *prefix) +static void emit_binary_diff(struct diff_options *o, + mmfile_t *one, mmfile_t *two) { - fprintf(file, "%sGIT binary patch\n", prefix); - emit_binary_diff_body(file, one, two, prefix); - emit_binary_diff_body(file, two, one, prefix); + emit_diff_symbol(o, DIFF_SYMBOL_BINARY_DIFF_HEADER, NULL, 0, 0); + emit_binary_diff_body(o, one, two); + emit_binary_diff_body(o, two, one); } int diff_filespec_is_binary(struct diff_filespec *one) @@@ -3121,16 -2362,24 +3122,16 @@@ static void builtin_diff(const char *na if (o->submodule_format == DIFF_SUBMODULE_LOG && (!one->mode || S_ISGITLINK(one->mode)) && (!two->mode || S_ISGITLINK(two->mode))) { - const char *del = diff_get_color_opt(o, DIFF_FILE_OLD); - const char *add = diff_get_color_opt(o, DIFF_FILE_NEW); - show_submodule_summary(o->file, one->path ? one->path : two->path, - line_prefix, + show_submodule_summary(o, one->path ? one->path : two->path, &one->oid, &two->oid, - two->dirty_submodule, - meta, del, add, reset); + two->dirty_submodule); return; } else if (o->submodule_format == DIFF_SUBMODULE_INLINE_DIFF && (!one->mode || S_ISGITLINK(one->mode)) && (!two->mode || S_ISGITLINK(two->mode))) { - const char *del = diff_get_color_opt(o, DIFF_FILE_OLD); - const char *add = diff_get_color_opt(o, DIFF_FILE_NEW); - show_submodule_inline_diff(o->file, one->path ? one->path : two->path, - line_prefix, + show_submodule_inline_diff(o, one->path ? one->path : two->path, &one->oid, &two->oid, - two->dirty_submodule, - meta, del, add, reset, o); + two->dirty_submodule); return; } @@@ -3179,8 -2428,7 +3180,8 @@@ if (complete_rewrite && (textconv_one || !diff_filespec_is_binary(one)) && (textconv_two || !diff_filespec_is_binary(two))) { - fprintf(o->file, "%s", header.buf); + emit_diff_symbol(o, DIFF_SYMBOL_HEADER, + header.buf, header.len, 0); strbuf_reset(&header); emit_rewrite_diff(name_a, name_b, one, two, textconv_one, textconv_two, o); @@@ -3190,31 -2438,23 +3191,31 @@@ } if (o->irreversible_delete && lbl[1][0] == '/') { - fprintf(o->file, "%s", header.buf); + emit_diff_symbol(o, DIFF_SYMBOL_HEADER, header.buf, + header.len, 0); strbuf_reset(&header); goto free_ab_and_return; } else if (!DIFF_OPT_TST(o, TEXT) && ( (!textconv_one && diff_filespec_is_binary(one)) || (!textconv_two && diff_filespec_is_binary(two)) )) { + struct strbuf sb = STRBUF_INIT; if (!one->data && !two->data && S_ISREG(one->mode) && S_ISREG(two->mode) && !DIFF_OPT_TST(o, BINARY)) { if (!oidcmp(&one->oid, &two->oid)) { if (must_show_header) - fprintf(o->file, "%s", header.buf); + emit_diff_symbol(o, DIFF_SYMBOL_HEADER, + header.buf, header.len, + 0); goto free_ab_and_return; } - fprintf(o->file, "%s", header.buf); - fprintf(o->file, "%sBinary files %s and %s differ\n", - line_prefix, lbl[0], lbl[1]); + emit_diff_symbol(o, DIFF_SYMBOL_HEADER, + header.buf, header.len, 0); + strbuf_addf(&sb, "%sBinary files %s and %s differ\n", + diff_line_prefix(o), lbl[0], lbl[1]); + emit_diff_symbol(o, DIFF_SYMBOL_BINARY_FILES, + sb.buf, sb.len, 0); + strbuf_release(&sb); goto free_ab_and_return; } if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) @@@ -3223,21 -2463,16 +3224,21 @@@ if (mf1.size == mf2.size && !memcmp(mf1.ptr, mf2.ptr, mf1.size)) { if (must_show_header) - fprintf(o->file, "%s", header.buf); + emit_diff_symbol(o, DIFF_SYMBOL_HEADER, + header.buf, header.len, 0); goto free_ab_and_return; } - fprintf(o->file, "%s", header.buf); + emit_diff_symbol(o, DIFF_SYMBOL_HEADER, header.buf, header.len, 0); strbuf_reset(&header); if (DIFF_OPT_TST(o, BINARY)) - emit_binary_diff(o->file, &mf1, &mf2, line_prefix); - else - fprintf(o->file, "%sBinary files %s and %s differ\n", - line_prefix, lbl[0], lbl[1]); + emit_binary_diff(o, &mf1, &mf2); + else { + strbuf_addf(&sb, "%sBinary files %s and %s differ\n", + diff_line_prefix(o), lbl[0], lbl[1]); + emit_diff_symbol(o, DIFF_SYMBOL_BINARY_FILES, + sb.buf, sb.len, 0); + strbuf_release(&sb); + } o->found_changes = 1; } else { /* Crazy xdl interfaces.. */ @@@ -3249,8 -2484,7 +3250,8 @@@ const struct userdiff_funcname *pe; if (must_show_header) { - fprintf(o->file, "%s", header.buf); + emit_diff_symbol(o, DIFF_SYMBOL_HEADER, + header.buf, header.len, 0); strbuf_reset(&header); } @@@ -4008,7 -3242,7 +4009,7 @@@ static void diff_fill_oid_info(struct d } if (lstat(one->path, &st) < 0) die_errno("stat '%s'", one->path); - if (index_path(one->oid.hash, one->path, &st, 0)) + if (index_path(&one->oid, one->path, &st, 0)) die("cannot hash %s", one->path); } } @@@ -4041,8 -3275,8 +4042,8 @@@ static void run_diff(struct diff_filepa const char *other; const char *attr_path; - name = p->one->path; - other = (strcmp(name, p->two->path) ? p->two->path : NULL); + name = one->path; + other = (strcmp(name, two->path) ? two->path : NULL); attr_path = name; if (o->prefix_length) strip_prefix(o->prefix_length, &name, &other); @@@ -4165,8 -3399,6 +4166,8 @@@ void diff_setup(struct diff_options *op options->a_prefix = "a/"; options->b_prefix = "b/"; } + + options->color_moved = diff_color_moved_default; } void diff_setup_done(struct diff_options *options) @@@ -4276,9 -3508,6 +4277,9 @@@ if (DIFF_OPT_TST(options, FOLLOW_RENAMES) && options->pathspec.nr != 1) die(_("--follow requires exactly one pathspec")); + + if (!options->use_color || external_diff()) + options->color_moved = 0; } static int opt_arg(const char *arg, int arg_short, const char *arg_long, int *val) @@@ -4703,19 -3932,7 +4704,19 @@@ int diff_opt_parse(struct diff_options } else if (!strcmp(arg, "--no-color")) options->use_color = 0; - else if (!strcmp(arg, "--color-words")) { + else if (!strcmp(arg, "--color-moved")) { + if (diff_color_moved_default) + options->color_moved = diff_color_moved_default; + if (options->color_moved == COLOR_MOVED_NO) + options->color_moved = COLOR_MOVED_DEFAULT; + } else if (!strcmp(arg, "--no-color-moved")) + options->color_moved = COLOR_MOVED_NO; + else if (skip_prefix(arg, "--color-moved=", &arg)) { + int cm = parse_color_moved(arg); + if (cm < 0) + die("bad --color-moved argument: %s", arg); + options->color_moved = cm; + } else if (!strcmp(arg, "--color-words")) { options->use_color = 1; options->word_diff = DIFF_WORDS_COLOR; } @@@ -5245,76 -4462,67 +5246,76 @@@ static void flush_one_pair(struct diff_ } } -static void show_file_mode_name(FILE *file, const char *newdelete, struct diff_filespec *fs) +static void show_file_mode_name(struct diff_options *opt, const char *newdelete, struct diff_filespec *fs) { + struct strbuf sb = STRBUF_INIT; if (fs->mode) - fprintf(file, " %s mode %06o ", newdelete, fs->mode); + strbuf_addf(&sb, " %s mode %06o ", newdelete, fs->mode); else - fprintf(file, " %s ", newdelete); - write_name_quoted(fs->path, file, '\n'); -} + strbuf_addf(&sb, " %s ", newdelete); + quote_c_style(fs->path, &sb, NULL, 0); + strbuf_addch(&sb, '\n'); + emit_diff_symbol(opt, DIFF_SYMBOL_SUMMARY, + sb.buf, sb.len, 0); + strbuf_release(&sb); +} -static void show_mode_change(FILE *file, struct diff_filepair *p, int show_name, - const char *line_prefix) +static void show_mode_change(struct diff_options *opt, struct diff_filepair *p, + int show_name) { if (p->one->mode && p->two->mode && p->one->mode != p->two->mode) { - fprintf(file, "%s mode change %06o => %06o%c", line_prefix, p->one->mode, - p->two->mode, show_name ? ' ' : '\n'); + struct strbuf sb = STRBUF_INIT; + strbuf_addf(&sb, " mode change %06o => %06o", + p->one->mode, p->two->mode); if (show_name) { - write_name_quoted(p->two->path, file, '\n'); + strbuf_addch(&sb, ' '); + quote_c_style(p->two->path, &sb, NULL, 0); } + emit_diff_symbol(opt, DIFF_SYMBOL_SUMMARY, + sb.buf, sb.len, 0); + strbuf_release(&sb); } } -static void show_rename_copy(FILE *file, const char *renamecopy, struct diff_filepair *p, - const char *line_prefix) +static void show_rename_copy(struct diff_options *opt, const char *renamecopy, + struct diff_filepair *p) { + struct strbuf sb = STRBUF_INIT; char *names = pprint_rename(p->one->path, p->two->path); - - fprintf(file, " %s %s (%d%%)\n", renamecopy, names, similarity_index(p)); + strbuf_addf(&sb, " %s %s (%d%%)\n", + renamecopy, names, similarity_index(p)); free(names); - show_mode_change(file, p, 0, line_prefix); + emit_diff_symbol(opt, DIFF_SYMBOL_SUMMARY, + sb.buf, sb.len, 0); + show_mode_change(opt, p, 0); } static void diff_summary(struct diff_options *opt, struct diff_filepair *p) { - FILE *file = opt->file; - const char *line_prefix = diff_line_prefix(opt); - switch(p->status) { case DIFF_STATUS_DELETED: - fputs(line_prefix, file); - show_file_mode_name(file, "delete", p->one); + show_file_mode_name(opt, "delete", p->one); break; case DIFF_STATUS_ADDED: - fputs(line_prefix, file); - show_file_mode_name(file, "create", p->two); + show_file_mode_name(opt, "create", p->two); break; case DIFF_STATUS_COPIED: - fputs(line_prefix, file); - show_rename_copy(file, "copy", p, line_prefix); + show_rename_copy(opt, "copy", p); break; case DIFF_STATUS_RENAMED: - fputs(line_prefix, file); - show_rename_copy(file, "rename", p, line_prefix); + show_rename_copy(opt, "rename", p); break; default: if (p->score) { - fprintf(file, "%s rewrite ", line_prefix); - write_name_quoted(p->two->path, file, ' '); - fprintf(file, "(%d%%)\n", similarity_index(p)); + struct strbuf sb = STRBUF_INIT; + strbuf_addstr(&sb, " rewrite "); + quote_c_style(p->two->path, &sb, NULL, 0); + strbuf_addf(&sb, " (%d%%)\n", similarity_index(p)); + emit_diff_symbol(opt, DIFF_SYMBOL_SUMMARY, + sb.buf, sb.len, 0); } - show_mode_change(file, p, !p->score, line_prefix); + show_mode_change(opt, p, !p->score); break; } } @@@ -5519,51 -4727,6 +5520,51 @@@ void diff_warn_rename_limit(const char warning(_(rename_limit_advice), varname, needed); } +static void diff_flush_patch_all_file_pairs(struct diff_options *o) +{ + int i; + static struct emitted_diff_symbols esm = EMITTED_DIFF_SYMBOLS_INIT; + struct diff_queue_struct *q = &diff_queued_diff; + + if (WSEH_NEW & WS_RULE_MASK) + die("BUG: WS rules bit mask overlaps with diff symbol flags"); + + if (o->color_moved) + o->emitted_symbols = &esm; + + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + if (check_pair_status(p)) + diff_flush_patch(p, o); + } + + if (o->emitted_symbols) { + if (o->color_moved) { + struct hashmap add_lines, del_lines; + + hashmap_init(&del_lines, + (hashmap_cmp_fn)moved_entry_cmp, o, 0); + hashmap_init(&add_lines, + (hashmap_cmp_fn)moved_entry_cmp, o, 0); + + add_lines_to_move_detection(o, &add_lines, &del_lines); + mark_color_as_moved(o, &add_lines, &del_lines); + if (o->color_moved == COLOR_MOVED_ZEBRA_DIM) + dim_moved_lines(o); + + hashmap_free(&add_lines, 0); + hashmap_free(&del_lines, 0); + } + + for (i = 0; i < esm.nr; i++) + emit_diff_symbol_from_struct(o, &esm.buf[i]); + + for (i = 0; i < esm.nr; i++) + free((void *)esm.buf[i].line); + } + esm.nr = 0; +} + void diff_flush(struct diff_options *options) { struct diff_queue_struct *q = &diff_queued_diff; @@@ -5636,7 -4799,6 +5637,7 @@@ fclose(options->file); options->file = xfopen("/dev/null", "w"); options->close_file = 1; + options->color_moved = 0; for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; if (check_pair_status(p)) @@@ -5648,14 -4810,20 +5649,14 @@@ if (output_format & DIFF_FORMAT_PATCH) { if (separator) { - fprintf(options->file, "%s%c", - diff_line_prefix(options), - options->line_termination); - if (options->stat_sep) { + emit_diff_symbol(options, DIFF_SYMBOL_SEPARATOR, NULL, 0, 0); + if (options->stat_sep) /* attach patch instead of inline */ - fputs(options->stat_sep, options->file); - } + emit_diff_symbol(options, DIFF_SYMBOL_STAT_SEP, + NULL, 0, 0); } - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - if (check_pair_status(p)) - diff_flush_patch(p, options); - } + diff_flush_patch_all_file_pairs(options); } if (output_format & DIFF_FORMAT_CALLBACK) diff --combined http.c index fa8666a21f,59bf8833af..9e40a465fd --- a/http.c +++ b/http.c @@@ -11,6 -11,7 +11,7 @@@ #include "pkt-line.h" #include "gettext.h" #include "transport.h" + #include "packfile.h" static struct trace_key trace_curl = TRACE_KEY_INIT(CURL); #if LIBCURL_VERSION_NUM >= 0x070a08 @@@ -91,7 -92,7 +92,7 @@@ static struct * here, too */ }; -#if LIBCURL_VERSION_NUM >= 0x071600 +#ifdef CURLGSSAPI_DELEGATION_FLAG static const char *curl_deleg; static struct { const char *name; @@@ -352,7 -353,7 +353,7 @@@ static int http_options(const char *var } if (!strcmp("http.delegation", var)) { -#if LIBCURL_VERSION_NUM >= 0x071600 +#ifdef CURLGSSAPI_DELEGATION_FLAG return git_config_string(&curl_deleg, var, value); #else warning(_("Delegation control is not supported with cURL < 7.22.0")); @@@ -677,7 -678,6 +678,7 @@@ void setup_curl_trace(CURL *handle curl_easy_setopt(handle, CURLOPT_DEBUGDATA, NULL); } +#ifdef CURLPROTO_HTTP static long get_curl_allowed_protocols(int from_user) { long allowed_protocols = 0; @@@ -693,7 -693,6 +694,7 @@@ return allowed_protocols; } +#endif static CURL *get_curl_handle(void) { @@@ -719,7 -718,7 +720,7 @@@ curl_easy_setopt(result, CURLOPT_HTTPAUTH, CURLAUTH_ANY); #endif -#if LIBCURL_VERSION_NUM >= 0x071600 +#ifdef CURLGSSAPI_DELEGATION_FLAG if (curl_deleg) { int i; for (i = 0; i < ARRAY_SIZE(curl_deleg_levels); i++) { @@@ -792,7 -791,7 +793,7 @@@ #elif LIBCURL_VERSION_NUM >= 0x071101 curl_easy_setopt(result, CURLOPT_POST301, 1); #endif -#if LIBCURL_VERSION_NUM >= 0x071304 +#ifdef CURLPROTO_HTTP curl_easy_setopt(result, CURLOPT_REDIR_PROTOCOLS, get_curl_allowed_protocols(0)); curl_easy_setopt(result, CURLOPT_PROTOCOLS, diff --combined sha1_file.c index 73a4a0c98e,bb0831b4c8..f56bb5cae7 --- a/sha1_file.c +++ b/sha1_file.c @@@ -28,9 -28,7 +28,7 @@@ #include "list.h" #include "mergesort.h" #include "quote.h" - - #define SZ_FMT PRIuMAX - static inline uintmax_t sz_fmt(size_t s) { return s; } + #include "packfile.h" const unsigned char null_sha1[20]; const struct object_id null_oid; @@@ -278,28 -276,6 +276,6 @@@ static const char *alt_sha1_path(struc return buf->buf; } - char *odb_pack_name(struct strbuf *buf, - const unsigned char *sha1, - const char *ext) - { - strbuf_reset(buf); - strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(), - sha1_to_hex(sha1), ext); - return buf->buf; - } - - char *sha1_pack_name(const unsigned char *sha1) - { - static struct strbuf buf = STRBUF_INIT; - return odb_pack_name(&buf, sha1, "pack"); - } - - char *sha1_pack_index_name(const unsigned char *sha1) - { - static struct strbuf buf = STRBUF_INIT; - return odb_pack_name(&buf, sha1, "idx"); - } - struct alternate_object_database *alt_odb_list; static struct alternate_object_database **alt_odb_tail; @@@ -705,213 -681,6 +681,6 @@@ static int has_loose_object(const unsig return check_and_freshen(sha1, 0); } - static unsigned int pack_used_ctr; - static unsigned int pack_mmap_calls; - static unsigned int peak_pack_open_windows; - static unsigned int pack_open_windows; - static unsigned int pack_open_fds; - static unsigned int pack_max_fds; - static size_t peak_pack_mapped; - static size_t pack_mapped; - struct packed_git *packed_git; - - static struct mru packed_git_mru_storage; - struct mru *packed_git_mru = &packed_git_mru_storage; - - void pack_report(void) - { - fprintf(stderr, - "pack_report: getpagesize() = %10" SZ_FMT "\n" - "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n" - "pack_report: core.packedGitLimit = %10" SZ_FMT "\n", - sz_fmt(getpagesize()), - sz_fmt(packed_git_window_size), - sz_fmt(packed_git_limit)); - fprintf(stderr, - "pack_report: pack_used_ctr = %10u\n" - "pack_report: pack_mmap_calls = %10u\n" - "pack_report: pack_open_windows = %10u / %10u\n" - "pack_report: pack_mapped = " - "%10" SZ_FMT " / %10" SZ_FMT "\n", - pack_used_ctr, - pack_mmap_calls, - pack_open_windows, peak_pack_open_windows, - sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped)); - } - - /* - * Open and mmap the index file at path, perform a couple of - * consistency checks, then record its information to p. Return 0 on - * success. - */ - static int check_packed_git_idx(const char *path, struct packed_git *p) - { - void *idx_map; - struct pack_idx_header *hdr; - size_t idx_size; - uint32_t version, nr, i, *index; - int fd = git_open(path); - struct stat st; - - if (fd < 0) - return -1; - if (fstat(fd, &st)) { - close(fd); - return -1; - } - idx_size = xsize_t(st.st_size); - if (idx_size < 4 * 256 + 20 + 20) { - close(fd); - return error("index file %s is too small", path); - } - idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - - hdr = idx_map; - if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) { - version = ntohl(hdr->idx_version); - if (version < 2 || version > 2) { - munmap(idx_map, idx_size); - return error("index file %s is version %"PRIu32 - " and is not supported by this binary" - " (try upgrading GIT to a newer version)", - path, version); - } - } else - version = 1; - - nr = 0; - index = idx_map; - if (version > 1) - index += 2; /* skip index header */ - for (i = 0; i < 256; i++) { - uint32_t n = ntohl(index[i]); - if (n < nr) { - munmap(idx_map, idx_size); - return error("non-monotonic index %s", path); - } - nr = n; - } - - if (version == 1) { - /* - * Total size: - * - 256 index entries 4 bytes each - * - 24-byte entries * nr (20-byte sha1 + 4-byte offset) - * - 20-byte SHA1 of the packfile - * - 20-byte SHA1 file checksum - */ - if (idx_size != 4*256 + nr * 24 + 20 + 20) { - munmap(idx_map, idx_size); - return error("wrong index v1 file size in %s", path); - } - } else if (version == 2) { - /* - * Minimum size: - * - 8 bytes of header - * - 256 index entries 4 bytes each - * - 20-byte sha1 entry * nr - * - 4-byte crc entry * nr - * - 4-byte offset entry * nr - * - 20-byte SHA1 of the packfile - * - 20-byte SHA1 file checksum - * And after the 4-byte offset table might be a - * variable sized table containing 8-byte entries - * for offsets larger than 2^31. - */ - unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20; - unsigned long max_size = min_size; - if (nr) - max_size += (nr - 1)*8; - if (idx_size < min_size || idx_size > max_size) { - munmap(idx_map, idx_size); - return error("wrong index v2 file size in %s", path); - } - if (idx_size != min_size && - /* - * make sure we can deal with large pack offsets. - * 31-bit signed offset won't be enough, neither - * 32-bit unsigned one will be. - */ - (sizeof(off_t) <= 4)) { - munmap(idx_map, idx_size); - return error("pack too large for current definition of off_t in %s", path); - } - } - - p->index_version = version; - p->index_data = idx_map; - p->index_size = idx_size; - p->num_objects = nr; - return 0; - } - - int open_pack_index(struct packed_git *p) - { - char *idx_name; - size_t len; - int ret; - - if (p->index_data) - return 0; - - if (!strip_suffix(p->pack_name, ".pack", &len)) - die("BUG: pack_name does not end in .pack"); - idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name); - ret = check_packed_git_idx(idx_name, p); - free(idx_name); - return ret; - } - - static void scan_windows(struct packed_git *p, - struct packed_git **lru_p, - struct pack_window **lru_w, - struct pack_window **lru_l) - { - struct pack_window *w, *w_l; - - for (w_l = NULL, w = p->windows; w; w = w->next) { - if (!w->inuse_cnt) { - if (!*lru_w || w->last_used < (*lru_w)->last_used) { - *lru_p = p; - *lru_w = w; - *lru_l = w_l; - } - } - w_l = w; - } - } - - static int unuse_one_window(struct packed_git *current) - { - struct packed_git *p, *lru_p = NULL; - struct pack_window *lru_w = NULL, *lru_l = NULL; - - if (current) - scan_windows(current, &lru_p, &lru_w, &lru_l); - for (p = packed_git; p; p = p->next) - scan_windows(p, &lru_p, &lru_w, &lru_l); - if (lru_p) { - munmap(lru_w->base, lru_w->len); - pack_mapped -= lru_w->len; - if (lru_l) - lru_l->next = lru_w->next; - else - lru_p->windows = lru_w->next; - free(lru_w); - pack_open_windows--; - return 1; - } - return 0; - } - - void release_pack_memory(size_t need) - { - size_t cur = pack_mapped; - while (need >= (cur - pack_mapped) && unuse_one_window(NULL)) - ; /* nothing */ - } - static void mmap_limit_check(size_t length) { static size_t limit = 0; @@@ -950,1951 -719,360 +719,360 @@@ void *xmmap(void *start, size_t length return ret; } - void close_pack_windows(struct packed_git *p) - { - while (p->windows) { - struct pack_window *w = p->windows; - - if (w->inuse_cnt) - die("pack '%s' still has open windows to it", - p->pack_name); - munmap(w->base, w->len); - pack_mapped -= w->len; - pack_open_windows--; - p->windows = w->next; - free(w); - } - } - - static int close_pack_fd(struct packed_git *p) - { - if (p->pack_fd < 0) - return 0; - - close(p->pack_fd); - pack_open_fds--; - p->pack_fd = -1; - - return 1; - } - - static void close_pack(struct packed_git *p) - { - close_pack_windows(p); - close_pack_fd(p); - close_pack_index(p); - } - - void close_all_packs(void) - { - struct packed_git *p; - - for (p = packed_git; p; p = p->next) - if (p->do_not_close) - die("BUG: want to close pack marked 'do-not-close'"); - else - close_pack(p); - } - - /* - * The LRU pack is the one with the oldest MRU window, preferring packs - * with no used windows, or the oldest mtime if it has no windows allocated. + * With an in-core object data in "map", rehash it to make sure the + * object name actually matches "sha1" to detect object corruption. + * With "map" == NULL, try reading the object named with "sha1" using + * the streaming interface and rehash it to do the same. */ - static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse) - { - struct pack_window *w, *this_mru_w; - int has_windows_inuse = 0; - - /* - * Reject this pack if it has windows and the previously selected - * one does not. If this pack does not have windows, reject - * it if the pack file is newer than the previously selected one. - */ - if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime)) - return; - - for (w = this_mru_w = p->windows; w; w = w->next) { - /* - * Reject this pack if any of its windows are in use, - * but the previously selected pack did not have any - * inuse windows. Otherwise, record that this pack - * has windows in use. - */ - if (w->inuse_cnt) { - if (*accept_windows_inuse) - has_windows_inuse = 1; - else - return; - } - - if (w->last_used > this_mru_w->last_used) - this_mru_w = w; - - /* - * Reject this pack if it has windows that have been - * used more recently than the previously selected pack. - * If the previously selected pack had windows inuse and - * we have not encountered a window in this pack that is - * inuse, skip this check since we prefer a pack with no - * inuse windows to one that has inuse windows. - */ - if (*mru_w && *accept_windows_inuse == has_windows_inuse && - this_mru_w->last_used > (*mru_w)->last_used) - return; - } - - /* - * Select this pack. - */ - *mru_w = this_mru_w; - *lru_p = p; - *accept_windows_inuse = has_windows_inuse; - } - - static int close_one_pack(void) + int check_sha1_signature(const unsigned char *sha1, void *map, + unsigned long size, const char *type) { - struct packed_git *p, *lru_p = NULL; - struct pack_window *mru_w = NULL; - int accept_windows_inuse = 1; + unsigned char real_sha1[20]; + enum object_type obj_type; + struct git_istream *st; + git_SHA_CTX c; + char hdr[32]; + int hdrlen; - for (p = packed_git; p; p = p->next) { - if (p->pack_fd == -1) - continue; - find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); + if (map) { + hash_sha1_file(map, size, type, real_sha1); + return hashcmp(sha1, real_sha1) ? -1 : 0; } - if (lru_p) - return close_pack_fd(lru_p); + st = open_istream(sha1, &obj_type, &size, NULL); + if (!st) + return -1; - return 0; - } + /* Generate the header */ + hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(obj_type), size) + 1; - void unuse_pack(struct pack_window **w_cursor) - { - struct pack_window *w = *w_cursor; - if (w) { - w->inuse_cnt--; - *w_cursor = NULL; - } - } + /* Sha1.. */ + git_SHA1_Init(&c); + git_SHA1_Update(&c, hdr, hdrlen); + for (;;) { + char buf[1024 * 16]; + ssize_t readlen = read_istream(st, buf, sizeof(buf)); - void close_pack_index(struct packed_git *p) - { - if (p->index_data) { - munmap((void *)p->index_data, p->index_size); - p->index_data = NULL; + if (readlen < 0) { + close_istream(st); + return -1; + } + if (!readlen) + break; + git_SHA1_Update(&c, buf, readlen); } + git_SHA1_Final(real_sha1, &c); + close_istream(st); + return hashcmp(sha1, real_sha1) ? -1 : 0; } - static unsigned int get_max_fd_limit(void) + int git_open_cloexec(const char *name, int flags) { - #ifdef RLIMIT_NOFILE - { - struct rlimit lim; + int fd; + static int o_cloexec = O_CLOEXEC; - if (!getrlimit(RLIMIT_NOFILE, &lim)) - return lim.rlim_cur; + fd = open(name, flags | o_cloexec); + if ((o_cloexec & O_CLOEXEC) && fd < 0 && errno == EINVAL) { + /* Try again w/o O_CLOEXEC: the kernel might not support it */ + o_cloexec &= ~O_CLOEXEC; + fd = open(name, flags | o_cloexec); } - #endif - #ifdef _SC_OPEN_MAX + #if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) { - long open_max = sysconf(_SC_OPEN_MAX); - if (0 < open_max) - return open_max; - /* - * Otherwise, we got -1 for one of the two - * reasons: - * - * (1) sysconf() did not understand _SC_OPEN_MAX - * and signaled an error with -1; or - * (2) sysconf() said there is no limit. - * - * We _could_ clear errno before calling sysconf() to - * tell these two cases apart and return a huge number - * in the latter case to let the caller cap it to a - * value that is not so selfish, but letting the - * fallback OPEN_MAX codepath take care of these cases - * is a lot simpler. - */ - } - #endif + static int fd_cloexec = FD_CLOEXEC; - #ifdef OPEN_MAX - return OPEN_MAX; - #else - return 1; /* see the caller ;-) */ + if (!o_cloexec && 0 <= fd && fd_cloexec) { + /* Opened w/o O_CLOEXEC? try with fcntl(2) to add it */ + int flags = fcntl(fd, F_GETFD); + if (fcntl(fd, F_SETFD, flags | fd_cloexec)) + fd_cloexec = 0; + } + } #endif + return fd; } /* - * Do not call this directly as this leaks p->pack_fd on error return; - * call open_packed_git() instead. + * Find "sha1" as a loose object in the local repository or in an alternate. + * Returns 0 on success, negative on failure. + * + * The "path" out-parameter will give the path of the object we found (if any). + * Note that it may point to static storage and is only valid until another + * call to sha1_file_name(), etc. */ - static int open_packed_git_1(struct packed_git *p) + static int stat_sha1_file(const unsigned char *sha1, struct stat *st, + const char **path) { - struct stat st; - struct pack_header hdr; - unsigned char sha1[20]; - unsigned char *idx_sha1; - long fd_flag; - - if (!p->index_data && open_pack_index(p)) - return error("packfile %s index unavailable", p->pack_name); + struct alternate_object_database *alt; - if (!pack_max_fds) { - unsigned int max_fds = get_max_fd_limit(); + *path = sha1_file_name(sha1); + if (!lstat(*path, st)) + return 0; - /* Save 3 for stdin/stdout/stderr, 22 for work */ - if (25 < max_fds) - pack_max_fds = max_fds - 25; - else - pack_max_fds = 1; + prepare_alt_odb(); + errno = ENOENT; + for (alt = alt_odb_list; alt; alt = alt->next) { + *path = alt_sha1_path(alt, sha1); + if (!lstat(*path, st)) + return 0; } - while (pack_max_fds <= pack_open_fds && close_one_pack()) - ; /* nothing */ - - p->pack_fd = git_open(p->pack_name); - if (p->pack_fd < 0 || fstat(p->pack_fd, &st)) - return -1; - pack_open_fds++; - - /* If we created the struct before we had the pack we lack size. */ - if (!p->pack_size) { - if (!S_ISREG(st.st_mode)) - return error("packfile %s not a regular file", p->pack_name); - p->pack_size = st.st_size; - } else if (p->pack_size != st.st_size) - return error("packfile %s size changed", p->pack_name); - - /* We leave these file descriptors open with sliding mmap; - * there is no point keeping them open across exec(), though. - */ - fd_flag = fcntl(p->pack_fd, F_GETFD, 0); - if (fd_flag < 0) - return error("cannot determine file descriptor flags"); - fd_flag |= FD_CLOEXEC; - if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1) - return error("cannot set FD_CLOEXEC"); - - /* Verify we recognize this pack file format. */ - if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr)) - return error("file %s is far too short to be a packfile", p->pack_name); - if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) - return error("file %s is not a GIT packfile", p->pack_name); - if (!pack_version_ok(hdr.hdr_version)) - return error("packfile %s is version %"PRIu32" and not" - " supported (try upgrading GIT to a newer version)", - p->pack_name, ntohl(hdr.hdr_version)); - - /* Verify the pack matches its index. */ - if (p->num_objects != ntohl(hdr.hdr_entries)) - return error("packfile %s claims to have %"PRIu32" objects" - " while index indicates %"PRIu32" objects", - p->pack_name, ntohl(hdr.hdr_entries), - p->num_objects); - if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1) - return error("end of packfile %s is unavailable", p->pack_name); - if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1)) - return error("packfile %s signature is unavailable", p->pack_name); - idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40; - if (hashcmp(sha1, idx_sha1)) - return error("packfile %s does not match index", p->pack_name); - return 0; - } - - static int open_packed_git(struct packed_git *p) - { - if (!open_packed_git_1(p)) - return 0; - close_pack_fd(p); return -1; } - static int in_window(struct pack_window *win, off_t offset) + /* + * Like stat_sha1_file(), but actually open the object and return the + * descriptor. See the caveats on the "path" parameter above. + */ + static int open_sha1_file(const unsigned char *sha1, const char **path) { - /* We must promise at least 20 bytes (one hash) after the - * offset is available from this window, otherwise the offset - * is not actually in this window and a different window (which - * has that one hash excess) must be used. This is to support - * the object header and delta base parsing routines below. - */ - off_t win_off = win->offset; - return win_off <= offset - && (offset + 20) <= (win_off + win->len); - } + int fd; + struct alternate_object_database *alt; + int most_interesting_errno; - unsigned char *use_pack(struct packed_git *p, - struct pack_window **w_cursor, - off_t offset, - unsigned long *left) - { - struct pack_window *win = *w_cursor; + *path = sha1_file_name(sha1); + fd = git_open(*path); + if (fd >= 0) + return fd; + most_interesting_errno = errno; - /* Since packfiles end in a hash of their content and it's - * pointless to ask for an offset into the middle of that - * hash, and the in_window function above wouldn't match - * don't allow an offset too close to the end of the file. - */ - if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p)) - die("packfile %s cannot be accessed", p->pack_name); - if (offset > (p->pack_size - 20)) - die("offset beyond end of packfile (truncated pack?)"); - if (offset < 0) - die(_("offset before end of packfile (broken .idx?)")); - - if (!win || !in_window(win, offset)) { - if (win) - win->inuse_cnt--; - for (win = p->windows; win; win = win->next) { - if (in_window(win, offset)) - break; - } - if (!win) { - size_t window_align = packed_git_window_size / 2; - off_t len; - - if (p->pack_fd == -1 && open_packed_git(p)) - die("packfile %s cannot be accessed", p->pack_name); - - win = xcalloc(1, sizeof(*win)); - win->offset = (offset / window_align) * window_align; - len = p->pack_size - win->offset; - if (len > packed_git_window_size) - len = packed_git_window_size; - win->len = (size_t)len; - pack_mapped += win->len; - while (packed_git_limit < pack_mapped - && unuse_one_window(p)) - ; /* nothing */ - win->base = xmmap(NULL, win->len, - PROT_READ, MAP_PRIVATE, - p->pack_fd, win->offset); - if (win->base == MAP_FAILED) - die_errno("packfile %s cannot be mapped", - p->pack_name); - if (!win->offset && win->len == p->pack_size - && !p->do_not_close) - close_pack_fd(p); - pack_mmap_calls++; - pack_open_windows++; - if (pack_mapped > peak_pack_mapped) - peak_pack_mapped = pack_mapped; - if (pack_open_windows > peak_pack_open_windows) - peak_pack_open_windows = pack_open_windows; - win->next = p->windows; - p->windows = win; - } - } - if (win != *w_cursor) { - win->last_used = pack_used_ctr++; - win->inuse_cnt++; - *w_cursor = win; + prepare_alt_odb(); + for (alt = alt_odb_list; alt; alt = alt->next) { + *path = alt_sha1_path(alt, sha1); + fd = git_open(*path); + if (fd >= 0) + return fd; + if (most_interesting_errno == ENOENT) + most_interesting_errno = errno; } - offset -= win->offset; - if (left) - *left = win->len - xsize_t(offset); - return win->base + offset; + errno = most_interesting_errno; + return -1; } - static struct packed_git *alloc_packed_git(int extra) + /* + * Map the loose object at "path" if it is not NULL, or the path found by + * searching for a loose object named "sha1". + */ + static void *map_sha1_file_1(const char *path, + const unsigned char *sha1, + unsigned long *size) { - struct packed_git *p = xmalloc(st_add(sizeof(*p), extra)); - memset(p, 0, sizeof(*p)); - p->pack_fd = -1; - return p; - } - - static void try_to_free_pack_memory(size_t size) - { - release_pack_memory(size); - } - - struct packed_git *add_packed_git(const char *path, size_t path_len, int local) - { - static int have_set_try_to_free_routine; - struct stat st; - size_t alloc; - struct packed_git *p; - - if (!have_set_try_to_free_routine) { - have_set_try_to_free_routine = 1; - set_try_to_free_routine(try_to_free_pack_memory); - } - - /* - * Make sure a corresponding .pack file exists and that - * the index looks sane. - */ - if (!strip_suffix_mem(path, &path_len, ".idx")) - return NULL; - - /* - * ".pack" is long enough to hold any suffix we're adding (and - * the use xsnprintf double-checks that) - */ - alloc = st_add3(path_len, strlen(".pack"), 1); - p = alloc_packed_git(alloc); - memcpy(p->pack_name, path, path_len); - - xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep"); - if (!access(p->pack_name, F_OK)) - p->pack_keep = 1; - - xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack"); - if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { - free(p); - return NULL; - } - - /* ok, it looks sane as far as we can check without - * actually mapping the pack file. - */ - p->pack_size = st.st_size; - p->pack_local = local; - p->mtime = st.st_mtime; - if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1)) - hashclr(p->sha1); - return p; - } - - struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path) - { - const char *path = sha1_pack_name(sha1); - size_t alloc = st_add(strlen(path), 1); - struct packed_git *p = alloc_packed_git(alloc); - - memcpy(p->pack_name, path, alloc); /* includes NUL */ - hashcpy(p->sha1, sha1); - if (check_packed_git_idx(idx_path, p)) { - free(p); - return NULL; - } - - return p; - } - - void install_packed_git(struct packed_git *pack) - { - if (pack->pack_fd != -1) - pack_open_fds++; - - pack->next = packed_git; - packed_git = pack; - } - - void (*report_garbage)(unsigned seen_bits, const char *path); - - static void report_helper(const struct string_list *list, - int seen_bits, int first, int last) - { - if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX)) - return; - - for (; first < last; first++) - report_garbage(seen_bits, list->items[first].string); - } - - static void report_pack_garbage(struct string_list *list) - { - int i, baselen = -1, first = 0, seen_bits = 0; - - if (!report_garbage) - return; - - string_list_sort(list); - - for (i = 0; i < list->nr; i++) { - const char *path = list->items[i].string; - if (baselen != -1 && - strncmp(path, list->items[first].string, baselen)) { - report_helper(list, seen_bits, first, i); - baselen = -1; - seen_bits = 0; - } - if (baselen == -1) { - const char *dot = strrchr(path, '.'); - if (!dot) { - report_garbage(PACKDIR_FILE_GARBAGE, path); - continue; - } - baselen = dot - path + 1; - first = i; - } - if (!strcmp(path + baselen, "pack")) - seen_bits |= 1; - else if (!strcmp(path + baselen, "idx")) - seen_bits |= 2; - } - report_helper(list, seen_bits, first, list->nr); - } - - static void prepare_packed_git_one(char *objdir, int local) - { - struct strbuf path = STRBUF_INIT; - size_t dirnamelen; - DIR *dir; - struct dirent *de; - struct string_list garbage = STRING_LIST_INIT_DUP; - - strbuf_addstr(&path, objdir); - strbuf_addstr(&path, "/pack"); - dir = opendir(path.buf); - if (!dir) { - if (errno != ENOENT) - error_errno("unable to open object pack directory: %s", - path.buf); - strbuf_release(&path); - return; - } - strbuf_addch(&path, '/'); - dirnamelen = path.len; - while ((de = readdir(dir)) != NULL) { - struct packed_git *p; - size_t base_len; - - if (is_dot_or_dotdot(de->d_name)) - continue; - - strbuf_setlen(&path, dirnamelen); - strbuf_addstr(&path, de->d_name); - - base_len = path.len; - if (strip_suffix_mem(path.buf, &base_len, ".idx")) { - /* Don't reopen a pack we already have. */ - for (p = packed_git; p; p = p->next) { - size_t len; - if (strip_suffix(p->pack_name, ".pack", &len) && - len == base_len && - !memcmp(p->pack_name, path.buf, len)) - break; - } - if (p == NULL && - /* - * See if it really is a valid .idx file with - * corresponding .pack file that we can map. - */ - (p = add_packed_git(path.buf, path.len, local)) != NULL) - install_packed_git(p); - } - - if (!report_garbage) - continue; - - if (ends_with(de->d_name, ".idx") || - ends_with(de->d_name, ".pack") || - ends_with(de->d_name, ".bitmap") || - ends_with(de->d_name, ".keep")) - string_list_append(&garbage, path.buf); - else - report_garbage(PACKDIR_FILE_GARBAGE, path.buf); - } - closedir(dir); - report_pack_garbage(&garbage); - string_list_clear(&garbage, 0); - strbuf_release(&path); - } - - static int approximate_object_count_valid; - - /* - * Give a fast, rough count of the number of objects in the repository. This - * ignores loose objects completely. If you have a lot of them, then either - * you should repack because your performance will be awful, or they are - * all unreachable objects about to be pruned, in which case they're not really - * interesting as a measure of repo size in the first place. - */ - unsigned long approximate_object_count(void) - { - static unsigned long count; - if (!approximate_object_count_valid) { - struct packed_git *p; - - prepare_packed_git(); - count = 0; - for (p = packed_git; p; p = p->next) { - if (open_pack_index(p)) - continue; - count += p->num_objects; - } - } - return count; - } - - static void *get_next_packed_git(const void *p) - { - return ((const struct packed_git *)p)->next; - } - - static void set_next_packed_git(void *p, void *next) - { - ((struct packed_git *)p)->next = next; - } - - static int sort_pack(const void *a_, const void *b_) - { - const struct packed_git *a = a_; - const struct packed_git *b = b_; - int st; - - /* - * Local packs tend to contain objects specific to our - * variant of the project than remote ones. In addition, - * remote ones could be on a network mounted filesystem. - * Favor local ones for these reasons. - */ - st = a->pack_local - b->pack_local; - if (st) - return -st; - - /* - * Younger packs tend to contain more recent objects, - * and more recent objects tend to get accessed more - * often. - */ - if (a->mtime < b->mtime) - return 1; - else if (a->mtime == b->mtime) - return 0; - return -1; - } - - static void rearrange_packed_git(void) - { - packed_git = llist_mergesort(packed_git, get_next_packed_git, - set_next_packed_git, sort_pack); - } - - static void prepare_packed_git_mru(void) - { - struct packed_git *p; - - mru_clear(packed_git_mru); - for (p = packed_git; p; p = p->next) - mru_append(packed_git_mru, p); - } - - static int prepare_packed_git_run_once = 0; - void prepare_packed_git(void) - { - struct alternate_object_database *alt; - - if (prepare_packed_git_run_once) - return; - prepare_packed_git_one(get_object_directory(), 1); - prepare_alt_odb(); - for (alt = alt_odb_list; alt; alt = alt->next) - prepare_packed_git_one(alt->path, 0); - rearrange_packed_git(); - prepare_packed_git_mru(); - prepare_packed_git_run_once = 1; - } - - void reprepare_packed_git(void) - { - approximate_object_count_valid = 0; - prepare_packed_git_run_once = 0; - prepare_packed_git(); - } - - static void mark_bad_packed_object(struct packed_git *p, - const unsigned char *sha1) - { - unsigned i; - for (i = 0; i < p->num_bad_objects; i++) - if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i)) - return; - p->bad_object_sha1 = xrealloc(p->bad_object_sha1, - st_mult(GIT_MAX_RAWSZ, - st_add(p->num_bad_objects, 1))); - hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1); - p->num_bad_objects++; - } - - static const struct packed_git *has_packed_and_bad(const unsigned char *sha1) - { - struct packed_git *p; - unsigned i; - - for (p = packed_git; p; p = p->next) - for (i = 0; i < p->num_bad_objects; i++) - if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i)) - return p; - return NULL; - } - - /* - * With an in-core object data in "map", rehash it to make sure the - * object name actually matches "sha1" to detect object corruption. - * With "map" == NULL, try reading the object named with "sha1" using - * the streaming interface and rehash it to do the same. - */ - int check_sha1_signature(const unsigned char *sha1, void *map, - unsigned long size, const char *type) - { - unsigned char real_sha1[20]; - enum object_type obj_type; - struct git_istream *st; - git_SHA_CTX c; - char hdr[32]; - int hdrlen; - - if (map) { - hash_sha1_file(map, size, type, real_sha1); - return hashcmp(sha1, real_sha1) ? -1 : 0; - } - - st = open_istream(sha1, &obj_type, &size, NULL); - if (!st) - return -1; - - /* Generate the header */ - hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(obj_type), size) + 1; - - /* Sha1.. */ - git_SHA1_Init(&c); - git_SHA1_Update(&c, hdr, hdrlen); - for (;;) { - char buf[1024 * 16]; - ssize_t readlen = read_istream(st, buf, sizeof(buf)); - - if (readlen < 0) { - close_istream(st); - return -1; - } - if (!readlen) - break; - git_SHA1_Update(&c, buf, readlen); - } - git_SHA1_Final(real_sha1, &c); - close_istream(st); - return hashcmp(sha1, real_sha1) ? -1 : 0; - } - - int git_open_cloexec(const char *name, int flags) - { - int fd; - static int o_cloexec = O_CLOEXEC; - - fd = open(name, flags | o_cloexec); - if ((o_cloexec & O_CLOEXEC) && fd < 0 && errno == EINVAL) { - /* Try again w/o O_CLOEXEC: the kernel might not support it */ - o_cloexec &= ~O_CLOEXEC; - fd = open(name, flags | o_cloexec); - } - - #if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC) - { - static int fd_cloexec = FD_CLOEXEC; - - if (!o_cloexec && 0 <= fd && fd_cloexec) { - /* Opened w/o O_CLOEXEC? try with fcntl(2) to add it */ - int flags = fcntl(fd, F_GETFD); - if (fcntl(fd, F_SETFD, flags | fd_cloexec)) - fd_cloexec = 0; - } - } - #endif - return fd; - } - - /* - * Find "sha1" as a loose object in the local repository or in an alternate. - * Returns 0 on success, negative on failure. - * - * The "path" out-parameter will give the path of the object we found (if any). - * Note that it may point to static storage and is only valid until another - * call to sha1_file_name(), etc. - */ - static int stat_sha1_file(const unsigned char *sha1, struct stat *st, - const char **path) - { - struct alternate_object_database *alt; - - *path = sha1_file_name(sha1); - if (!lstat(*path, st)) - return 0; - - prepare_alt_odb(); - errno = ENOENT; - for (alt = alt_odb_list; alt; alt = alt->next) { - *path = alt_sha1_path(alt, sha1); - if (!lstat(*path, st)) - return 0; - } - - return -1; - } - - /* - * Like stat_sha1_file(), but actually open the object and return the - * descriptor. See the caveats on the "path" parameter above. - */ - static int open_sha1_file(const unsigned char *sha1, const char **path) - { - int fd; - struct alternate_object_database *alt; - int most_interesting_errno; - - *path = sha1_file_name(sha1); - fd = git_open(*path); - if (fd >= 0) - return fd; - most_interesting_errno = errno; - - prepare_alt_odb(); - for (alt = alt_odb_list; alt; alt = alt->next) { - *path = alt_sha1_path(alt, sha1); - fd = git_open(*path); - if (fd >= 0) - return fd; - if (most_interesting_errno == ENOENT) - most_interesting_errno = errno; - } - errno = most_interesting_errno; - return -1; - } - - /* - * Map the loose object at "path" if it is not NULL, or the path found by - * searching for a loose object named "sha1". - */ - static void *map_sha1_file_1(const char *path, - const unsigned char *sha1, - unsigned long *size) - { - void *map; - int fd; - - if (path) - fd = git_open(path); - else - fd = open_sha1_file(sha1, &path); - map = NULL; - if (fd >= 0) { - struct stat st; - - if (!fstat(fd, &st)) { - *size = xsize_t(st.st_size); - if (!*size) { - /* mmap() is forbidden on empty files */ - error("object file %s is empty", path); - return NULL; - } - map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); - } - close(fd); - } - return map; - } - - void *map_sha1_file(const unsigned char *sha1, unsigned long *size) - { - return map_sha1_file_1(NULL, sha1, size); - } - - unsigned long unpack_object_header_buffer(const unsigned char *buf, - unsigned long len, enum object_type *type, unsigned long *sizep) - { - unsigned shift; - unsigned long size, c; - unsigned long used = 0; - - c = buf[used++]; - *type = (c >> 4) & 7; - size = c & 15; - shift = 4; - while (c & 0x80) { - if (len <= used || bitsizeof(long) <= shift) { - error("bad object header"); - size = used = 0; - break; - } - c = buf[used++]; - size += (c & 0x7f) << shift; - shift += 7; - } - *sizep = size; - return used; - } - - static int unpack_sha1_short_header(git_zstream *stream, - unsigned char *map, unsigned long mapsize, - void *buffer, unsigned long bufsiz) - { - /* Get the data stream */ - memset(stream, 0, sizeof(*stream)); - stream->next_in = map; - stream->avail_in = mapsize; - stream->next_out = buffer; - stream->avail_out = bufsiz; - - git_inflate_init(stream); - return git_inflate(stream, 0); - } - - int unpack_sha1_header(git_zstream *stream, - unsigned char *map, unsigned long mapsize, - void *buffer, unsigned long bufsiz) - { - int status = unpack_sha1_short_header(stream, map, mapsize, - buffer, bufsiz); - - if (status < Z_OK) - return status; - - /* Make sure we have the terminating NUL */ - if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) - return -1; - return 0; - } - - static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map, - unsigned long mapsize, void *buffer, - unsigned long bufsiz, struct strbuf *header) - { - int status; - - status = unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz); - if (status < Z_OK) - return -1; - - /* - * Check if entire header is unpacked in the first iteration. - */ - if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) - return 0; - - /* - * buffer[0..bufsiz] was not large enough. Copy the partial - * result out to header, and then append the result of further - * reading the stream. - */ - strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); - stream->next_out = buffer; - stream->avail_out = bufsiz; - - do { - status = git_inflate(stream, 0); - strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); - if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) - return 0; - stream->next_out = buffer; - stream->avail_out = bufsiz; - } while (status != Z_STREAM_END); - return -1; - } - - static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1) - { - int bytes = strlen(buffer) + 1; - unsigned char *buf = xmallocz(size); - unsigned long n; - int status = Z_OK; - - n = stream->total_out - bytes; - if (n > size) - n = size; - memcpy(buf, (char *) buffer + bytes, n); - bytes = n; - if (bytes <= size) { - /* - * The above condition must be (bytes <= size), not - * (bytes < size). In other words, even though we - * expect no more output and set avail_out to zero, - * the input zlib stream may have bytes that express - * "this concludes the stream", and we *do* want to - * eat that input. - * - * Otherwise we would not be able to test that we - * consumed all the input to reach the expected size; - * we also want to check that zlib tells us that all - * went well with status == Z_STREAM_END at the end. - */ - stream->next_out = buf + bytes; - stream->avail_out = size - bytes; - while (status == Z_OK) - status = git_inflate(stream, Z_FINISH); - } - if (status == Z_STREAM_END && !stream->avail_in) { - git_inflate_end(stream); - return buf; - } - - if (status < 0) - error("corrupt loose object '%s'", sha1_to_hex(sha1)); - else if (stream->avail_in) - error("garbage at end of loose object '%s'", - sha1_to_hex(sha1)); - free(buf); - return NULL; - } - - /* - * We used to just use "sscanf()", but that's actually way - * too permissive for what we want to check. So do an anal - * object header parse by hand. - */ - static int parse_sha1_header_extended(const char *hdr, struct object_info *oi, - unsigned int flags) - { - const char *type_buf = hdr; - unsigned long size; - int type, type_len = 0; - - /* - * The type can be of any size but is followed by - * a space. - */ - for (;;) { - char c = *hdr++; - if (!c) - return -1; - if (c == ' ') - break; - type_len++; - } - - type = type_from_string_gently(type_buf, type_len, 1); - if (oi->typename) - strbuf_add(oi->typename, type_buf, type_len); - /* - * Set type to 0 if its an unknown object and - * we're obtaining the type using '--allow-unknown-type' - * option. - */ - if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE) && (type < 0)) - type = 0; - else if (type < 0) - die("invalid object type"); - if (oi->typep) - *oi->typep = type; - - /* - * The length must follow immediately, and be in canonical - * decimal format (ie "010" is not valid). - */ - size = *hdr++ - '0'; - if (size > 9) - return -1; - if (size) { - for (;;) { - unsigned long c = *hdr - '0'; - if (c > 9) - break; - hdr++; - size = size * 10 + c; - } - } - - if (oi->sizep) - *oi->sizep = size; - - /* - * The length must be followed by a zero byte - */ - return *hdr ? -1 : type; - } - - int parse_sha1_header(const char *hdr, unsigned long *sizep) - { - struct object_info oi = OBJECT_INFO_INIT; - - oi.sizep = sizep; - return parse_sha1_header_extended(hdr, &oi, 0); - } - - unsigned long get_size_from_delta(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos) - { - const unsigned char *data; - unsigned char delta_head[20], *in; - git_zstream stream; - int st; - - memset(&stream, 0, sizeof(stream)); - stream.next_out = delta_head; - stream.avail_out = sizeof(delta_head); - - git_inflate_init(&stream); - do { - in = use_pack(p, w_curs, curpos, &stream.avail_in); - stream.next_in = in; - st = git_inflate(&stream, Z_FINISH); - curpos += stream.next_in - in; - } while ((st == Z_OK || st == Z_BUF_ERROR) && - stream.total_out < sizeof(delta_head)); - git_inflate_end(&stream); - if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) { - error("delta data unpack-initial failed"); - return 0; - } - - /* Examine the initial part of the delta to figure out - * the result size. - */ - data = delta_head; - - /* ignore base size */ - get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); - - /* Read the result size */ - return get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); - } - - static off_t get_delta_base(struct packed_git *p, - struct pack_window **w_curs, - off_t *curpos, - enum object_type type, - off_t delta_obj_offset) - { - unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL); - off_t base_offset; - - /* use_pack() assured us we have [base_info, base_info + 20) - * as a range that we can look at without walking off the - * end of the mapped window. Its actually the hash size - * that is assured. An OFS_DELTA longer than the hash size - * is stupid, as then a REF_DELTA would be smaller to store. - */ - if (type == OBJ_OFS_DELTA) { - unsigned used = 0; - unsigned char c = base_info[used++]; - base_offset = c & 127; - while (c & 128) { - base_offset += 1; - if (!base_offset || MSB(base_offset, 7)) - return 0; /* overflow */ - c = base_info[used++]; - base_offset = (base_offset << 7) + (c & 127); - } - base_offset = delta_obj_offset - base_offset; - if (base_offset <= 0 || base_offset >= delta_obj_offset) - return 0; /* out of bound */ - *curpos += used; - } else if (type == OBJ_REF_DELTA) { - /* The base entry _must_ be in the same pack */ - base_offset = find_pack_entry_one(base_info, p); - *curpos += 20; - } else - die("I am totally screwed"); - return base_offset; - } - - /* - * Like get_delta_base above, but we return the sha1 instead of the pack - * offset. This means it is cheaper for REF deltas (we do not have to do - * the final object lookup), but more expensive for OFS deltas (we - * have to load the revidx to convert the offset back into a sha1). - */ - static const unsigned char *get_delta_base_sha1(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos, - enum object_type type, - off_t delta_obj_offset) - { - if (type == OBJ_REF_DELTA) { - unsigned char *base = use_pack(p, w_curs, curpos, NULL); - return base; - } else if (type == OBJ_OFS_DELTA) { - struct revindex_entry *revidx; - off_t base_offset = get_delta_base(p, w_curs, &curpos, - type, delta_obj_offset); - - if (!base_offset) - return NULL; - - revidx = find_pack_revindex(p, base_offset); - if (!revidx) - return NULL; - - return nth_packed_object_sha1(p, revidx->nr); - } else - return NULL; - } - - int unpack_object_header(struct packed_git *p, - struct pack_window **w_curs, - off_t *curpos, - unsigned long *sizep) - { - unsigned char *base; - unsigned long left; - unsigned long used; - enum object_type type; - - /* use_pack() assures us we have [base, base + 20) available - * as a range that we can look at. (Its actually the hash - * size that is assured.) With our object header encoding - * the maximum deflated object size is 2^137, which is just - * insane, so we know won't exceed what we have been given. - */ - base = use_pack(p, w_curs, *curpos, &left); - used = unpack_object_header_buffer(base, left, &type, sizep); - if (!used) { - type = OBJ_BAD; - } else - *curpos += used; - - return type; - } - - static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset) - { - int type; - struct revindex_entry *revidx; - const unsigned char *sha1; - revidx = find_pack_revindex(p, obj_offset); - if (!revidx) - return OBJ_BAD; - sha1 = nth_packed_object_sha1(p, revidx->nr); - mark_bad_packed_object(p, sha1); - type = sha1_object_info(sha1, NULL); - if (type <= OBJ_NONE) - return OBJ_BAD; - return type; - } - - #define POI_STACK_PREALLOC 64 - - static enum object_type packed_to_object_type(struct packed_git *p, - off_t obj_offset, - enum object_type type, - struct pack_window **w_curs, - off_t curpos) - { - off_t small_poi_stack[POI_STACK_PREALLOC]; - off_t *poi_stack = small_poi_stack; - int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC; - - while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { - off_t base_offset; - unsigned long size; - /* Push the object we're going to leave behind */ - if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) { - poi_stack_alloc = alloc_nr(poi_stack_nr); - ALLOC_ARRAY(poi_stack, poi_stack_alloc); - memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr); - } else { - ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc); - } - poi_stack[poi_stack_nr++] = obj_offset; - /* If parsing the base offset fails, just unwind */ - base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); - if (!base_offset) - goto unwind; - curpos = obj_offset = base_offset; - type = unpack_object_header(p, w_curs, &curpos, &size); - if (type <= OBJ_NONE) { - /* If getting the base itself fails, we first - * retry the base, otherwise unwind */ - type = retry_bad_packed_offset(p, base_offset); - if (type > OBJ_NONE) - goto out; - goto unwind; - } - } - - switch (type) { - case OBJ_BAD: - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - break; - default: - error("unknown object type %i at offset %"PRIuMAX" in %s", - type, (uintmax_t)obj_offset, p->pack_name); - type = OBJ_BAD; - } - - out: - if (poi_stack != small_poi_stack) - free(poi_stack); - return type; - - unwind: - while (poi_stack_nr) { - obj_offset = poi_stack[--poi_stack_nr]; - type = retry_bad_packed_offset(p, obj_offset); - if (type > OBJ_NONE) - goto out; - } - type = OBJ_BAD; - goto out; - } - - static struct hashmap delta_base_cache; - static size_t delta_base_cached; - - static LIST_HEAD(delta_base_cache_lru); - - struct delta_base_cache_key { - struct packed_git *p; - off_t base_offset; - }; - - struct delta_base_cache_entry { - struct hashmap hash; - struct delta_base_cache_key key; - struct list_head lru; - void *data; - unsigned long size; - enum object_type type; - }; - - static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset) - { - unsigned int hash; - - hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset; - hash += (hash >> 8) + (hash >> 16); - return hash; - } - - static struct delta_base_cache_entry * - get_delta_base_cache_entry(struct packed_git *p, off_t base_offset) - { - struct hashmap_entry entry; - struct delta_base_cache_key key; - - if (!delta_base_cache.cmpfn) - return NULL; - - hashmap_entry_init(&entry, pack_entry_hash(p, base_offset)); - key.p = p; - key.base_offset = base_offset; - return hashmap_get(&delta_base_cache, &entry, &key); - } - - static int delta_base_cache_key_eq(const struct delta_base_cache_key *a, - const struct delta_base_cache_key *b) - { - return a->p == b->p && a->base_offset == b->base_offset; - } + void *map; + int fd; - static int delta_base_cache_hash_cmp(const void *unused_cmp_data, - const void *va, const void *vb, - const void *vkey) - { - const struct delta_base_cache_entry *a = va, *b = vb; - const struct delta_base_cache_key *key = vkey; - if (key) - return !delta_base_cache_key_eq(&a->key, key); + if (path) + fd = git_open(path); else - return !delta_base_cache_key_eq(&a->key, &b->key); - } - - static int in_delta_base_cache(struct packed_git *p, off_t base_offset) - { - return !!get_delta_base_cache_entry(p, base_offset); - } - - /* - * Remove the entry from the cache, but do _not_ free the associated - * entry data. The caller takes ownership of the "data" buffer, and - * should copy out any fields it wants before detaching. - */ - static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent) - { - hashmap_remove(&delta_base_cache, ent, &ent->key); - list_del(&ent->lru); - delta_base_cached -= ent->size; - free(ent); - } - - static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, - unsigned long *base_size, enum object_type *type) - { - struct delta_base_cache_entry *ent; - - ent = get_delta_base_cache_entry(p, base_offset); - if (!ent) - return unpack_entry(p, base_offset, type, base_size); - - if (type) - *type = ent->type; - if (base_size) - *base_size = ent->size; - return xmemdupz(ent->data, ent->size); - } - - static inline void release_delta_base_cache(struct delta_base_cache_entry *ent) - { - free(ent->data); - detach_delta_base_cache_entry(ent); - } - - void clear_delta_base_cache(void) - { - struct list_head *lru, *tmp; - list_for_each_safe(lru, tmp, &delta_base_cache_lru) { - struct delta_base_cache_entry *entry = - list_entry(lru, struct delta_base_cache_entry, lru); - release_delta_base_cache(entry); - } - } - - static void add_delta_base_cache(struct packed_git *p, off_t base_offset, - void *base, unsigned long base_size, enum object_type type) - { - struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent)); - struct list_head *lru, *tmp; - - delta_base_cached += base_size; - - list_for_each_safe(lru, tmp, &delta_base_cache_lru) { - struct delta_base_cache_entry *f = - list_entry(lru, struct delta_base_cache_entry, lru); - if (delta_base_cached <= delta_base_cache_limit) - break; - release_delta_base_cache(f); - } - - ent->key.p = p; - ent->key.base_offset = base_offset; - ent->type = type; - ent->data = base; - ent->size = base_size; - list_add_tail(&ent->lru, &delta_base_cache_lru); - - if (!delta_base_cache.cmpfn) - hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0); - hashmap_entry_init(ent, pack_entry_hash(p, base_offset)); - hashmap_add(&delta_base_cache, ent); - } - - int packed_object_info(struct packed_git *p, off_t obj_offset, - struct object_info *oi) - { - struct pack_window *w_curs = NULL; - unsigned long size; - off_t curpos = obj_offset; - enum object_type type; - - /* - * We always get the representation type, but only convert it to - * a "real" type later if the caller is interested. - */ - if (oi->contentp) { - *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep, - &type); - if (!*oi->contentp) - type = OBJ_BAD; - } else { - type = unpack_object_header(p, &w_curs, &curpos, &size); - } - - if (!oi->contentp && oi->sizep) { - if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { - off_t tmp_pos = curpos; - off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos, - type, obj_offset); - if (!base_offset) { - type = OBJ_BAD; - goto out; - } - *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos); - if (*oi->sizep == 0) { - type = OBJ_BAD; - goto out; - } - } else { - *oi->sizep = size; - } - } - - if (oi->disk_sizep) { - struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); - *oi->disk_sizep = revidx[1].offset - obj_offset; - } - - if (oi->typep || oi->typename) { - enum object_type ptot; - ptot = packed_to_object_type(p, obj_offset, type, &w_curs, - curpos); - if (oi->typep) - *oi->typep = ptot; - if (oi->typename) { - const char *tn = typename(ptot); - if (tn) - strbuf_addstr(oi->typename, tn); - } - if (ptot < 0) { - type = OBJ_BAD; - goto out; - } - } - - if (oi->delta_base_sha1) { - if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { - const unsigned char *base; - - base = get_delta_base_sha1(p, &w_curs, curpos, - type, obj_offset); - if (!base) { - type = OBJ_BAD; - goto out; - } - - hashcpy(oi->delta_base_sha1, base); - } else - hashclr(oi->delta_base_sha1); - } - - oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED : - OI_PACKED; - - out: - unuse_pack(&w_curs); - return type; - } - - static void *unpack_compressed_entry(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos, - unsigned long size) - { - int st; - git_zstream stream; - unsigned char *buffer, *in; - - buffer = xmallocz_gently(size); - if (!buffer) - return NULL; - memset(&stream, 0, sizeof(stream)); - stream.next_out = buffer; - stream.avail_out = size + 1; - - git_inflate_init(&stream); - do { - in = use_pack(p, w_curs, curpos, &stream.avail_in); - stream.next_in = in; - st = git_inflate(&stream, Z_FINISH); - if (!stream.avail_out) - break; /* the payload is larger than it should be */ - curpos += stream.next_in - in; - } while (st == Z_OK || st == Z_BUF_ERROR); - git_inflate_end(&stream); - if ((st != Z_STREAM_END) || stream.total_out != size) { - free(buffer); - return NULL; - } - - return buffer; - } - - static void *read_object(const unsigned char *sha1, enum object_type *type, - unsigned long *size); - - static void write_pack_access_log(struct packed_git *p, off_t obj_offset) - { - static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS); - trace_printf_key(&pack_access, "%s %"PRIuMAX"\n", - p->pack_name, (uintmax_t)obj_offset); - } - - int do_check_packed_object_crc; - - #define UNPACK_ENTRY_STACK_PREALLOC 64 - struct unpack_entry_stack_ent { - off_t obj_offset; - off_t curpos; - unsigned long size; - }; - - void *unpack_entry(struct packed_git *p, off_t obj_offset, - enum object_type *final_type, unsigned long *final_size) - { - struct pack_window *w_curs = NULL; - off_t curpos = obj_offset; - void *data = NULL; - unsigned long size; - enum object_type type; - struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC]; - struct unpack_entry_stack_ent *delta_stack = small_delta_stack; - int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC; - int base_from_cache = 0; - - write_pack_access_log(p, obj_offset); - - /* PHASE 1: drill down to the innermost base object */ - for (;;) { - off_t base_offset; - int i; - struct delta_base_cache_entry *ent; - - ent = get_delta_base_cache_entry(p, curpos); - if (ent) { - type = ent->type; - data = ent->data; - size = ent->size; - detach_delta_base_cache_entry(ent); - base_from_cache = 1; - break; - } - - if (do_check_packed_object_crc && p->index_version > 1) { - struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); - off_t len = revidx[1].offset - obj_offset; - if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { - const unsigned char *sha1 = - nth_packed_object_sha1(p, revidx->nr); - error("bad packed object CRC for %s", - sha1_to_hex(sha1)); - mark_bad_packed_object(p, sha1); - data = NULL; - goto out; - } - } - - type = unpack_object_header(p, &w_curs, &curpos, &size); - if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA) - break; - - base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); - if (!base_offset) { - error("failed to validate delta base reference " - "at offset %"PRIuMAX" from %s", - (uintmax_t)curpos, p->pack_name); - /* bail to phase 2, in hopes of recovery */ - data = NULL; - break; - } - - /* push object, proceed to base */ - if (delta_stack_nr >= delta_stack_alloc - && delta_stack == small_delta_stack) { - delta_stack_alloc = alloc_nr(delta_stack_nr); - ALLOC_ARRAY(delta_stack, delta_stack_alloc); - memcpy(delta_stack, small_delta_stack, - sizeof(*delta_stack)*delta_stack_nr); - } else { - ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc); - } - i = delta_stack_nr++; - delta_stack[i].obj_offset = obj_offset; - delta_stack[i].curpos = curpos; - delta_stack[i].size = size; - - curpos = obj_offset = base_offset; - } - - /* PHASE 2: handle the base */ - switch (type) { - case OBJ_OFS_DELTA: - case OBJ_REF_DELTA: - if (data) - die("BUG: unpack_entry: left loop at a valid delta"); - break; - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - if (!base_from_cache) - data = unpack_compressed_entry(p, &w_curs, curpos, size); - break; - default: - data = NULL; - error("unknown object type %i at offset %"PRIuMAX" in %s", - type, (uintmax_t)obj_offset, p->pack_name); - } - - /* PHASE 3: apply deltas in order */ - - /* invariants: - * 'data' holds the base data, or NULL if there was corruption - */ - while (delta_stack_nr) { - void *delta_data; - void *base = data; - void *external_base = NULL; - unsigned long delta_size, base_size = size; - int i; - - data = NULL; - - if (base) - add_delta_base_cache(p, obj_offset, base, base_size, type); - - if (!base) { - /* - * We're probably in deep shit, but let's try to fetch - * the required base anyway from another pack or loose. - * This is costly but should happen only in the presence - * of a corrupted pack, and is better than failing outright. - */ - struct revindex_entry *revidx; - const unsigned char *base_sha1; - revidx = find_pack_revindex(p, obj_offset); - if (revidx) { - base_sha1 = nth_packed_object_sha1(p, revidx->nr); - error("failed to read delta base object %s" - " at offset %"PRIuMAX" from %s", - sha1_to_hex(base_sha1), (uintmax_t)obj_offset, - p->pack_name); - mark_bad_packed_object(p, base_sha1); - base = read_object(base_sha1, &type, &base_size); - external_base = base; - } - } - - i = --delta_stack_nr; - obj_offset = delta_stack[i].obj_offset; - curpos = delta_stack[i].curpos; - delta_size = delta_stack[i].size; - - if (!base) - continue; - - delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size); - - if (!delta_data) { - error("failed to unpack compressed delta " - "at offset %"PRIuMAX" from %s", - (uintmax_t)curpos, p->pack_name); - data = NULL; - free(external_base); - continue; - } - - data = patch_delta(base, base_size, - delta_data, delta_size, - &size); - - /* - * We could not apply the delta; warn the user, but keep going. - * Our failure will be noticed either in the next iteration of - * the loop, or if this is the final delta, in the caller when - * we return NULL. Those code paths will take care of making - * a more explicit warning and retrying with another copy of - * the object. - */ - if (!data) - error("failed to apply delta"); - - free(delta_data); - free(external_base); - } - - if (final_type) - *final_type = type; - if (final_size) - *final_size = size; - - out: - unuse_pack(&w_curs); - - if (delta_stack != small_delta_stack) - free(delta_stack); - - return data; - } + fd = open_sha1_file(sha1, &path); + map = NULL; + if (fd >= 0) { + struct stat st; - const unsigned char *nth_packed_object_sha1(struct packed_git *p, - uint32_t n) - { - const unsigned char *index = p->index_data; - if (!index) { - if (open_pack_index(p)) - return NULL; - index = p->index_data; - } - if (n >= p->num_objects) - return NULL; - index += 4 * 256; - if (p->index_version == 1) { - return index + 24 * n + 4; - } else { - index += 8; - return index + 20 * n; + if (!fstat(fd, &st)) { + *size = xsize_t(st.st_size); + if (!*size) { + /* mmap() is forbidden on empty files */ + error("object file %s is empty", path); + return NULL; + } + map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); + } + close(fd); } + return map; } - const struct object_id *nth_packed_object_oid(struct object_id *oid, - struct packed_git *p, - uint32_t n) + void *map_sha1_file(const unsigned char *sha1, unsigned long *size) { - const unsigned char *hash = nth_packed_object_sha1(p, n); - if (!hash) - return NULL; - hashcpy(oid->hash, hash); - return oid; + return map_sha1_file_1(NULL, sha1, size); } - void check_pack_index_ptr(const struct packed_git *p, const void *vptr) + static int unpack_sha1_short_header(git_zstream *stream, + unsigned char *map, unsigned long mapsize, + void *buffer, unsigned long bufsiz) { - const unsigned char *ptr = vptr; - const unsigned char *start = p->index_data; - const unsigned char *end = start + p->index_size; - if (ptr < start) - die(_("offset before start of pack index for %s (corrupt index?)"), - p->pack_name); - /* No need to check for underflow; .idx files must be at least 8 bytes */ - if (ptr >= end - 8) - die(_("offset beyond end of pack index for %s (truncated index?)"), - p->pack_name); - } + /* Get the data stream */ + memset(stream, 0, sizeof(*stream)); + stream->next_in = map; + stream->avail_in = mapsize; + stream->next_out = buffer; + stream->avail_out = bufsiz; - off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n) - { - const unsigned char *index = p->index_data; - index += 4 * 256; - if (p->index_version == 1) { - return ntohl(*((uint32_t *)(index + 24 * n))); - } else { - uint32_t off; - index += 8 + p->num_objects * (20 + 4); - off = ntohl(*((uint32_t *)(index + 4 * n))); - if (!(off & 0x80000000)) - return off; - index += p->num_objects * 4 + (off & 0x7fffffff) * 8; - check_pack_index_ptr(p, index); - return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) | - ntohl(*((uint32_t *)(index + 4))); - } + git_inflate_init(stream); + return git_inflate(stream, 0); } - off_t find_pack_entry_one(const unsigned char *sha1, - struct packed_git *p) + int unpack_sha1_header(git_zstream *stream, + unsigned char *map, unsigned long mapsize, + void *buffer, unsigned long bufsiz) { - const uint32_t *level1_ofs = p->index_data; - const unsigned char *index = p->index_data; - unsigned hi, lo, stride; - static int debug_lookup = -1; - - if (debug_lookup < 0) - debug_lookup = !!getenv("GIT_DEBUG_LOOKUP"); + int status = unpack_sha1_short_header(stream, map, mapsize, + buffer, bufsiz); - if (!index) { - if (open_pack_index(p)) - return 0; - level1_ofs = p->index_data; - index = p->index_data; - } - if (p->index_version > 1) { - level1_ofs += 2; - index += 8; - } - index += 4 * 256; - hi = ntohl(level1_ofs[*sha1]); - lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1])); - if (p->index_version > 1) { - stride = 20; - } else { - stride = 24; - index += 4; - } + if (status < Z_OK) + return status; - if (debug_lookup) - printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n", - sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects); - - while (lo < hi) { - unsigned mi = (lo + hi) / 2; - int cmp = hashcmp(index + mi * stride, sha1); - - if (debug_lookup) - printf("lo %u hi %u rg %u mi %u\n", - lo, hi, hi - lo, mi); - if (!cmp) - return nth_packed_object_offset(p, mi); - if (cmp > 0) - hi = mi; - else - lo = mi+1; - } + /* Make sure we have the terminating NUL */ + if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return -1; return 0; } - int is_pack_valid(struct packed_git *p) + static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map, + unsigned long mapsize, void *buffer, + unsigned long bufsiz, struct strbuf *header) { - /* An already open pack is known to be valid. */ - if (p->pack_fd != -1) - return 1; + int status; + + status = unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz); + if (status < Z_OK) + return -1; - /* If the pack has one window completely covering the - * file size, the pack is known to be valid even if - * the descriptor is not currently open. + /* + * Check if entire header is unpacked in the first iteration. */ - if (p->windows) { - struct pack_window *w = p->windows; + if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return 0; - if (!w->offset && w->len == p->pack_size) - return 1; - } + /* + * buffer[0..bufsiz] was not large enough. Copy the partial + * result out to header, and then append the result of further + * reading the stream. + */ + strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); + stream->next_out = buffer; + stream->avail_out = bufsiz; - /* Force the pack to open to prove its valid. */ - return !open_packed_git(p); + do { + status = git_inflate(stream, 0); + strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer); + if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer)) + return 0; + stream->next_out = buffer; + stream->avail_out = bufsiz; + } while (status != Z_STREAM_END); + return -1; } - static int fill_pack_entry(const unsigned char *sha1, - struct pack_entry *e, - struct packed_git *p) + static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1) { - off_t offset; + int bytes = strlen(buffer) + 1; + unsigned char *buf = xmallocz(size); + unsigned long n; + int status = Z_OK; - if (p->num_bad_objects) { - unsigned i; - for (i = 0; i < p->num_bad_objects; i++) - if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i)) - return 0; + n = stream->total_out - bytes; + if (n > size) + n = size; + memcpy(buf, (char *) buffer + bytes, n); + bytes = n; + if (bytes <= size) { + /* + * The above condition must be (bytes <= size), not + * (bytes < size). In other words, even though we + * expect no more output and set avail_out to zero, + * the input zlib stream may have bytes that express + * "this concludes the stream", and we *do* want to + * eat that input. + * + * Otherwise we would not be able to test that we + * consumed all the input to reach the expected size; + * we also want to check that zlib tells us that all + * went well with status == Z_STREAM_END at the end. + */ + stream->next_out = buf + bytes; + stream->avail_out = size - bytes; + while (status == Z_OK) + status = git_inflate(stream, Z_FINISH); + } + if (status == Z_STREAM_END && !stream->avail_in) { + git_inflate_end(stream); + return buf; } - offset = find_pack_entry_one(sha1, p); - if (!offset) - return 0; - - /* - * We are about to tell the caller where they can locate the - * requested object. We better make sure the packfile is - * still here and can be accessed before supplying that - * answer, as it may have been deleted since the index was - * loaded! - */ - if (!is_pack_valid(p)) - return 0; - e->offset = offset; - e->p = p; - hashcpy(e->sha1, sha1); - return 1; + if (status < 0) + error("corrupt loose object '%s'", sha1_to_hex(sha1)); + else if (stream->avail_in) + error("garbage at end of loose object '%s'", + sha1_to_hex(sha1)); + free(buf); + return NULL; } /* - * Iff a pack file contains the object named by sha1, return true and - * store its location to e. + * We used to just use "sscanf()", but that's actually way + * too permissive for what we want to check. So do an anal + * object header parse by hand. */ - static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e) + static int parse_sha1_header_extended(const char *hdr, struct object_info *oi, + unsigned int flags) { - struct mru_entry *p; + const char *type_buf = hdr; + unsigned long size; + int type, type_len = 0; - prepare_packed_git(); - if (!packed_git) - return 0; + /* + * The type can be of any size but is followed by + * a space. + */ + for (;;) { + char c = *hdr++; + if (!c) + return -1; + if (c == ' ') + break; + type_len++; + } - for (p = packed_git_mru->head; p; p = p->next) { - if (fill_pack_entry(sha1, e, p->item)) { - mru_mark(packed_git_mru, p); - return 1; + type = type_from_string_gently(type_buf, type_len, 1); + if (oi->typename) + strbuf_add(oi->typename, type_buf, type_len); + /* + * Set type to 0 if its an unknown object and + * we're obtaining the type using '--allow-unknown-type' + * option. + */ + if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE) && (type < 0)) + type = 0; + else if (type < 0) + die("invalid object type"); + if (oi->typep) + *oi->typep = type; + + /* + * The length must follow immediately, and be in canonical + * decimal format (ie "010" is not valid). + */ + size = *hdr++ - '0'; + if (size > 9) + return -1; + if (size) { + for (;;) { + unsigned long c = *hdr - '0'; + if (c > 9) + break; + hdr++; + size = size * 10 + c; } } - return 0; + + if (oi->sizep) + *oi->sizep = size; + + /* + * The length must be followed by a zero byte + */ + return *hdr ? -1 : type; } - struct packed_git *find_sha1_pack(const unsigned char *sha1, - struct packed_git *packs) + int parse_sha1_header(const char *hdr, unsigned long *sizep) { - struct packed_git *p; - - for (p = packs; p; p = p->next) { - if (find_pack_entry_one(sha1, p)) - return p; - } - return NULL; + struct object_info oi = OBJECT_INFO_INIT; + oi.sizep = sizep; + return parse_sha1_header_extended(hdr, &oi, 0); } static int sha1_loose_object_info(const unsigned char *sha1, @@@ -3053,6 -1231,20 +1231,20 @@@ int sha1_object_info(const unsigned cha return type; } + static void *read_object(const unsigned char *sha1, enum object_type *type, + unsigned long *size) + { + struct object_info oi = OBJECT_INFO_INIT; + void *content; + oi.typep = type; + oi.sizep = size; + oi.contentp = &content; + + if (sha1_object_info_extended(sha1, &oi, 0) < 0) + return NULL; + return content; + } + int pretend_sha1_file(void *buf, unsigned long len, enum object_type type, unsigned char *sha1) { @@@ -3071,20 -1263,6 +1263,6 @@@ return 0; } - static void *read_object(const unsigned char *sha1, enum object_type *type, - unsigned long *size) - { - struct object_info oi = OBJECT_INFO_INIT; - void *content; - oi.typep = type; - oi.sizep = size; - oi.contentp = &content; - - if (sha1_object_info_extended(sha1, &oi, 0) < 0) - return NULL; - return content; - } - /* * This function dies on corrupt objects; the callers who want to * deal with them should arrange to call read_object() and give error @@@ -3403,7 -1581,7 +1581,7 @@@ int write_sha1_file(const void *buf, un } int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type, - unsigned char *sha1, unsigned flags) + struct object_id *oid, unsigned flags) { char *header; int hdrlen, status = 0; @@@ -3411,13 -1589,13 +1589,13 @@@ /* type string, SP, %lu of the length plus NUL must fit this */ hdrlen = strlen(type) + 32; header = xmalloc(hdrlen); - write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen); + write_sha1_file_prepare(buf, len, type, oid->hash, header, &hdrlen); if (!(flags & HASH_WRITE_OBJECT)) goto cleanup; - if (freshen_packed_object(sha1) || freshen_loose_object(sha1)) + if (freshen_packed_object(oid->hash) || freshen_loose_object(oid->hash)) goto cleanup; - status = write_loose_object(sha1, header, hdrlen, buf, len, 0); + status = write_loose_object(oid->hash, header, hdrlen, buf, len, 0); cleanup: free(header); @@@ -3445,20 -1623,6 +1623,6 @@@ int force_object_loose(const unsigned c return ret; } - int has_pack_index(const unsigned char *sha1) - { - struct stat st; - if (stat(sha1_pack_index_name(sha1), &st)) - return 0; - return 1; - } - - int has_sha1_pack(const unsigned char *sha1) - { - struct pack_entry e; - return find_pack_entry(sha1, &e); - } - int has_sha1_file_with_flags(const unsigned char *sha1, int flags) { if (!startup_info->have_repository) @@@ -3621,14 -1785,14 +1785,14 @@@ static int index_core(unsigned char *sh * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -static int index_stream(unsigned char *sha1, int fd, size_t size, +static int index_stream(struct object_id *oid, int fd, size_t size, enum object_type type, const char *path, unsigned flags) { - return index_bulk_checkin(sha1, fd, size, type, path, flags); + return index_bulk_checkin(oid->hash, fd, size, type, path, flags); } -int index_fd(unsigned char *sha1, int fd, struct stat *st, +int index_fd(struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags) { int ret; @@@ -3638,21 -1802,21 +1802,21 @@@ * die() for large files. */ if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path)) - ret = index_stream_convert_blob(sha1, fd, path, flags); + ret = index_stream_convert_blob(oid->hash, fd, path, flags); else if (!S_ISREG(st->st_mode)) - ret = index_pipe(sha1, fd, type, path, flags); + ret = index_pipe(oid->hash, fd, type, path, flags); else if (st->st_size <= big_file_threshold || type != OBJ_BLOB || (path && would_convert_to_git(&the_index, path))) - ret = index_core(sha1, fd, xsize_t(st->st_size), type, path, + ret = index_core(oid->hash, fd, xsize_t(st->st_size), type, path, flags); else - ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path, + ret = index_stream(oid, fd, xsize_t(st->st_size), type, path, flags); close(fd); return ret; } -int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags) +int index_path(struct object_id *oid, const char *path, struct stat *st, unsigned flags) { int fd; struct strbuf sb = STRBUF_INIT; @@@ -3662,7 -1826,7 +1826,7 @@@ fd = open(path, O_RDONLY); if (fd < 0) return error_errno("open(\"%s\")", path); - if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0) + if (index_fd(oid, fd, st, OBJ_BLOB, path, flags) < 0) return error("%s: failed to insert into database", path); break; @@@ -3670,14 -1834,14 +1834,14 @@@ if (strbuf_readlink(&sb, path, st->st_size)) return error_errno("readlink(\"%s\")", path); if (!(flags & HASH_WRITE_OBJECT)) - hash_sha1_file(sb.buf, sb.len, blob_type, sha1); - else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1)) + hash_sha1_file(sb.buf, sb.len, blob_type, oid->hash); + else if (write_sha1_file(sb.buf, sb.len, blob_type, oid->hash)) return error("%s: failed to insert into database", path); strbuf_release(&sb); break; case S_IFDIR: - return resolve_gitlink_ref(path, "HEAD", sha1); + return resolve_gitlink_ref(path, "HEAD", oid->hash); default: return error("%s: unsupported file type", path); } @@@ -3851,46 -2015,6 +2015,6 @@@ int for_each_loose_object(each_loose_ob return foreach_alt_odb(loose_from_alt_odb, &alt); } - static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data) - { - uint32_t i; - int r = 0; - - for (i = 0; i < p->num_objects; i++) { - struct object_id oid; - - if (!nth_packed_object_oid(&oid, p, i)) - return error("unable to get sha1 of object %u in %s", - i, p->pack_name); - - r = cb(&oid, p, i, data); - if (r) - break; - } - return r; - } - - int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags) - { - struct packed_git *p; - int r = 0; - int pack_errors = 0; - - prepare_packed_git(); - for (p = packed_git; p; p = p->next) { - if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) - continue; - if (open_pack_index(p)) { - pack_errors = 1; - continue; - } - r = for_each_object_in_pack(p, cb, data); - if (r) - break; - } - return r ? r : pack_errors; - } - static int check_stream_sha1(git_zstream *stream, const char *hdr, unsigned long size,