From: Junio C Hamano Date: Thu, 29 Sep 2016 23:49:44 +0000 (-0700) Subject: Merge branch 'js/regexec-buf' into maint X-Git-Tag: v2.10.1~9 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/300e95f7df240a0f6efea09d5e21fcc350e5ce83?hp=-c Merge branch 'js/regexec-buf' into maint Some codepaths in "git diff" used regexec(3) on a buffer that was mmap(2)ed, which may not have a terminating NUL, leading to a read beyond the end of the mapped region. This was fixed by introducing a regexec_buf() helper that takes a pair with REG_STARTEND extension. * js/regexec-buf: regex: use regexec_buf() regex: add regexec_buf() that can work on a non NUL-terminated string regex: -G feeds a non NUL-terminated string to regexec() and fails --- 300e95f7df240a0f6efea09d5e21fcc350e5ce83 diff --combined Makefile index 7f184923dd,c6dc42d067..7a36af8665 --- a/Makefile +++ b/Makefile @@@ -296,12 -296,8 +296,13 @@@ all: # Define USE_NED_ALLOCATOR if you want to replace the platforms default # memory allocators with the nedmalloc allocator written by Niall Douglas. # +# Define OVERRIDE_STRDUP to override the libc version of strdup(3). +# This is necessary when using a custom allocator in order to avoid +# crashes due to allocation and free working on different 'heaps'. +# It's defined automatically if USE_NED_ALLOCATOR is set. +# - # Define NO_REGEX if you have no or inferior regex support in your C library. + # Define NO_REGEX if your C library lacks regex support with REG_STARTEND + # feature. # # Define HAVE_DEV_TTY if your system can open /dev/tty to interact with the # user. @@@ -356,12 -352,9 +357,12 @@@ # Define GMTIME_UNRELIABLE_ERRORS if your gmtime() function does not # return NULL when it receives a bogus time_t. # -# Define HAVE_CLOCK_GETTIME if your platform has clock_gettime in librt. +# Define HAVE_CLOCK_GETTIME if your platform has clock_gettime. +# +# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC. # -# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC in librt. +# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version +# before 2.17) for clock_gettime and CLOCK_MONOTONIC. # # Define USE_PARENS_AROUND_GETTEXT_N to "yes" if your compiler happily # compiles the following initialization: @@@ -375,14 -368,6 +376,14 @@@ # Define HAVE_BSD_SYSCTL if your platform has a BSD-compatible sysctl function. # # Define HAVE_GETDELIM if your system has the getdelim() function. +# +# Define PAGER_ENV to a SP separated VAR=VAL pairs to define +# default environment variables to be passed when a pager is spawned, e.g. +# +# PAGER_ENV = LESS=FRX LV=-c +# +# to say "export LESS=FRX (and LV=-c) if the environment variable +# LESS (and LV) is not set, respectively". GIT-VERSION-FILE: FORCE @$(SHELL_PATH) ./GIT-VERSION-GEN @@@ -391,7 -376,13 +392,7 @@@ # CFLAGS and LDFLAGS are for the users to override from the command line. CFLAGS = -g -O2 -Wall -LDFLAGS = -ALL_CFLAGS = $(CPPFLAGS) $(CFLAGS) -ALL_LDFLAGS = $(LDFLAGS) -STRIP ?= strip - -ifdef DEVELOPER -CFLAGS += -Werror \ +DEVELOPER_CFLAGS = -Werror \ -Wdeclaration-after-statement \ -Wno-format-zero-length \ -Wold-style-definition \ @@@ -400,10 -391,7 +401,10 @@@ -Wstrict-prototypes \ -Wunused \ -Wvla -endif +LDFLAGS = +ALL_CFLAGS = $(CPPFLAGS) $(CFLAGS) +ALL_LDFLAGS = $(LDFLAGS) +STRIP ?= strip # Create as necessary, replace existing, make ranlib unneeded. ARFLAGS = rcs @@@ -453,6 -441,7 +454,6 @@@ DIFF = dif TAR = tar FIND = find INSTALL = install -RPMBUILD = rpmbuild TCL_PATH = tclsh TCLTK_PATH = wish XGETTEXT = xgettext @@@ -633,7 -622,7 +634,7 @@@ TEST_PROGRAMS_NEED_X += test-svn-f TEST_PROGRAMS_NEED_X += test-urlmatch-normalization TEST_PROGRAMS_NEED_X += test-wildmatch -TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X)) +TEST_PROGRAMS = $(patsubst %,t/helper/%$X,$(TEST_PROGRAMS_NEED_X)) # List built-in command $C whose implementation cmd_$C() is not in # builtin/$C.o but is linked in as part of some other command. @@@ -734,7 -723,6 +735,7 @@@ LIB_OBJS += diff-lib. LIB_OBJS += diff-no-index.o LIB_OBJS += diff.o LIB_OBJS += dir.o +LIB_OBJS += dir-iterator.o LIB_OBJS += editor.o LIB_OBJS += entry.o LIB_OBJS += environment.o @@@ -768,7 -756,6 +769,7 @@@ LIB_OBJS += merge. LIB_OBJS += merge-blobs.o LIB_OBJS += merge-recursive.o LIB_OBJS += mergesort.o +LIB_OBJS += mru.o LIB_OBJS += name-hash.o LIB_OBJS += notes.o LIB_OBJS += notes-cache.o @@@ -800,7 -787,6 +801,7 @@@ LIB_OBJS += read-cache. LIB_OBJS += reflog-walk.o LIB_OBJS += refs.o LIB_OBJS += refs/files-backend.o +LIB_OBJS += refs/iterator.o LIB_OBJS += ref-filter.o LIB_OBJS += remote.o LIB_OBJS += replace_object.o @@@ -958,7 -944,7 +959,7 @@@ BUILTIN_OBJS += builtin/verify-tag. BUILTIN_OBJS += builtin/worktree.o BUILTIN_OBJS += builtin/write-tree.o -GITLIBS = $(LIB_FILE) $(XDIFF_LIB) +GITLIBS = common-main.o $(LIB_FILE) $(XDIFF_LIB) EXTLIBS = GIT_USER_AGENT = git/$(GIT_VERSION) @@@ -967,10 -953,6 +968,10 @@@ include config.mak.unam -include config.mak.autogen -include config.mak +ifdef DEVELOPER +CFLAGS += $(DEVELOPER_CFLAGS) +endif + ifndef sysconfdir ifeq ($(prefix),/usr) sysconfdir = /etc @@@ -1461,14 -1443,8 +1462,14 @@@ ifdef NATIVE_CRL endif ifdef USE_NED_ALLOCATOR - COMPAT_CFLAGS += -Icompat/nedmalloc - COMPAT_OBJS += compat/nedmalloc/nedmalloc.o + COMPAT_CFLAGS += -Icompat/nedmalloc + COMPAT_OBJS += compat/nedmalloc/nedmalloc.o + OVERRIDE_STRDUP = YesPlease +endif + +ifdef OVERRIDE_STRDUP + COMPAT_CFLAGS += -DOVERRIDE_STRDUP + COMPAT_OBJS += compat/strdup.o endif ifdef GIT_TEST_CMP_USE_COPIED_CONTEXT @@@ -1490,16 -1466,13 +1491,16 @@@ endi ifdef HAVE_CLOCK_GETTIME BASIC_CFLAGS += -DHAVE_CLOCK_GETTIME - EXTLIBS += -lrt endif ifdef HAVE_CLOCK_MONOTONIC BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC endif +ifdef NEEDS_LIBRT + EXTLIBS += -lrt +endif + ifdef HAVE_BSD_SYSCTL BASIC_CFLAGS += -DHAVE_BSD_SYSCTL endif @@@ -1520,10 -1493,6 +1521,10 @@@ ifeq ($(PYTHON_PATH), NO_PYTHON = NoThanks endif +ifndef PAGER_ENV +PAGER_ENV = LESS=FRX LV=-c +endif + QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir QUIET_SUBDIR1 = @@@ -1604,15 -1573,7 +1605,15 @@@ TCLTK_PATH_SQ = $(subst ','\'',$(TCLTK_ DIFF_SQ = $(subst ','\'',$(DIFF)) PERLLIB_EXTRA_SQ = $(subst ','\'',$(PERLLIB_EXTRA)) -LIBS = $(GITLIBS) $(EXTLIBS) +# We must filter out any object files from $(GITLIBS), +# as it is typically used like: +# +# foo: foo.o $(GITLIBS) +# $(CC) $(filter %.o,$^) $(LIBS) +# +# where we use it as a dependency. Since we also pull object files +# from the dependency list, that would make each entry appear twice. +LIBS = $(filter-out %.o, $(GITLIBS)) $(EXTLIBS) BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ $(COMPAT_CFLAGS) @@@ -1653,11 -1614,6 +1654,11 @@@ ifdef DEFAULT_HELP_FORMA BASIC_CFLAGS += -DDEFAULT_HELP_FORMAT='"$(DEFAULT_HELP_FORMAT)"' endif +PAGER_ENV_SQ = $(subst ','\'',$(PAGER_ENV)) +PAGER_ENV_CQ = "$(subst ",\",$(subst \,\\,$(PAGER_ENV)))" +PAGER_ENV_CQ_SQ = $(subst ','\'',$(PAGER_ENV_CQ)) +BASIC_CFLAGS += -DPAGER_ENV='$(PAGER_ENV_CQ_SQ)' + ALL_CFLAGS += $(BASIC_CFLAGS) ALL_LDFLAGS += $(BASIC_LDFLAGS) @@@ -1753,8 -1709,8 +1754,8 @@@ git.sp git.s git.o: EXTRA_CPPFLAGS = '-DGIT_INFO_PATH="$(infodir_relative_SQ)"' git$X: git.o GIT-LDFLAGS $(BUILTIN_OBJS) $(GITLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) git.o \ - $(BUILTIN_OBJS) $(LIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \ + $(filter %.o,$^) $(LIBS) help.sp help.s help.o: common-cmds.h @@@ -1782,7 -1738,7 +1783,7 @@@ common-cmds.h: $(wildcard Documentation SCRIPT_DEFINES = $(SHELL_PATH_SQ):$(DIFF_SQ):$(GIT_VERSION):\ $(localedir_SQ):$(NO_CURL):$(USE_GETTEXT_SCHEME):$(SANE_TOOL_PATH_SQ):\ - $(gitwebdir_SQ):$(PERL_PATH_SQ):$(SANE_TEXT_GREP) + $(gitwebdir_SQ):$(PERL_PATH_SQ):$(SANE_TEXT_GREP):$(PAGER_ENV) define cmd_munge_script $(RM) $@ $@+ && \ sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ @@@ -1795,7 -1751,6 +1796,7 @@@ -e 's|@@GITWEBDIR@@|$(gitwebdir_SQ)|g' \ -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ -e 's|@@SANE_TEXT_GREP@@|$(SANE_TEXT_GREP)|g' \ + -e 's|@@PAGER_ENV@@|$(PAGER_ENV_SQ)|g' \ $@.sh >$@+ endef @@@ -1944,11 -1899,10 +1945,11 @@@ VCSSVN_OBJS += vcs-svn/fast_export. VCSSVN_OBJS += vcs-svn/svndiff.o VCSSVN_OBJS += vcs-svn/svndump.o -TEST_OBJS := $(patsubst test-%$X,test-%.o,$(TEST_PROGRAMS)) +TEST_OBJS := $(patsubst %$X,%.o,$(TEST_PROGRAMS)) OBJECTS := $(LIB_OBJS) $(BUILTIN_OBJS) $(PROGRAM_OBJS) $(TEST_OBJS) \ $(XDIFF_OBJS) \ $(VCSSVN_OBJS) \ + common-main.o \ git.o ifndef NO_CURL OBJECTS += http.o http-walker.o remote-curl.o @@@ -2040,7 -1994,7 +2041,7 @@@ endi ifdef USE_NED_ALLOCATOR compat/nedmalloc/nedmalloc.sp compat/nedmalloc/nedmalloc.o: EXTRA_CPPFLAGS = \ - -DNDEBUG -DOVERRIDE_STRDUP -DREPLACE_SYSTEM_ALLOCATOR + -DNDEBUG -DREPLACE_SYSTEM_ALLOCATOR compat/nedmalloc/nedmalloc.sp: SPARSE_FLAGS += -Wno-non-pointer-null endif @@@ -2110,10 -2064,7 +2111,10 @@@ XGETTEXT_FLAGS_SH = $(XGETTEXT_FLAGS) - --keyword=gettextln --keyword=eval_gettextln XGETTEXT_FLAGS_PERL = $(XGETTEXT_FLAGS) --keyword=__ --language=Perl LOCALIZED_C = $(C_OBJ:o=c) $(LIB_H) $(GENERATED_H) -LOCALIZED_SH = $(SCRIPT_SH) git-parse-remote.sh +LOCALIZED_SH = $(SCRIPT_SH) +LOCALIZED_SH += git-parse-remote.sh +LOCALIZED_SH += git-rebase--interactive.sh +LOCALIZED_SH += git-sh-setup.sh LOCALIZED_PERL = $(SCRIPT_PERL) ifdef XGETTEXT_INCLUDE_TESTS @@@ -2203,7 -2154,6 +2204,7 @@@ GIT-BUILD-OPTIONS: FORC @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+ @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+ @echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+ + @echo PAGER_ENV=\''$(subst ','\'',$(subst ','\'',$(PAGER_ENV)))'\' >>$@+ ifdef TEST_OUTPUT_DIRECTORY @echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@+ endif @@@ -2256,7 -2206,7 +2257,7 @@@ bin-wrappers/%: wrap-for-bin.s @mkdir -p bin-wrappers $(QUIET_GEN)sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ -e 's|@@BUILD_DIR@@|$(shell pwd)|' \ - -e 's|@@PROG@@|$(@F)|' < $< > $@ && \ + -e 's|@@PROG@@|$(patsubst test-%,t/helper/test-%,$(@F))|' < $< > $@ && \ chmod +x $@ # GNU make supports exporting all variables by "export" without parameters. @@@ -2276,17 -2226,25 +2277,17 @@@ perf: al .PHONY: test perf -test-ctype$X: ctype.o - -test-date$X: date.o ctype.o - -test-delta$X: diff-delta.o patch-delta.o +t/helper/test-line-buffer$X: $(VCSSVN_LIB) -test-line-buffer$X: vcs-svn/lib.a - -test-parse-options$X: parse-options.o parse-options-cb.o - -test-svn-fe$X: vcs-svn/lib.a +t/helper/test-svn-fe$X: $(VCSSVN_LIB) .PRECIOUS: $(TEST_OBJS) -test-%$X: test-%.o GIT-LDFLAGS $(GITLIBS) +t/helper/test-%$X: t/helper/test-%.o GIT-LDFLAGS $(GITLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS) -check-sha1:: test-sha1$X - ./test-sha1.sh +check-sha1:: t/helper/test-sha1$X + t/helper/test-sha1.sh SP_OBJ = $(patsubst %.o,%.sp,$(C_OBJ)) @@@ -2433,25 -2391,31 +2434,25 @@@ quick-install-html ### Maintainer's dist rules -git.spec: git.spec.in GIT-VERSION-FILE - sed -e 's/@@VERSION@@/$(GIT_VERSION)/g' < $< > $@+ - mv $@+ $@ - GIT_TARNAME = git-$(GIT_VERSION) -dist: git.spec git-archive$(X) configure +dist: git-archive$(X) configure ./git-archive --format=tar \ --prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar @mkdir -p $(GIT_TARNAME) - @cp git.spec configure $(GIT_TARNAME) + @cp configure $(GIT_TARNAME) @echo $(GIT_VERSION) > $(GIT_TARNAME)/version @$(MAKE) -C git-gui TARDIR=../$(GIT_TARNAME)/git-gui dist-version $(TAR) rf $(GIT_TARNAME).tar \ - $(GIT_TARNAME)/git.spec \ $(GIT_TARNAME)/configure \ $(GIT_TARNAME)/version \ $(GIT_TARNAME)/git-gui/version @$(RM) -r $(GIT_TARNAME) gzip -f -9 $(GIT_TARNAME).tar -rpm: dist - $(RPMBUILD) \ - --define "_source_filedigest_algorithm md5" \ - --define "_binary_filedigest_algorithm md5" \ - -ta $(GIT_TARNAME).tar.gz +rpm:: + @echo >&2 "Use distro packaged sources to run rpmbuild" + @false +.PHONY: rpm htmldocs = git-htmldocs-$(GIT_VERSION) manpages = git-manpages-$(GIT_VERSION) @@@ -2487,8 -2451,8 +2488,8 @@@ profile-clean $(RM) $(addsuffix *.gcno,$(addprefix $(PROFILE_DIR)/, $(object_dirs))) clean: profile-clean coverage-clean - $(RM) *.o *.res refs/*.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o - $(RM) xdiff/*.o vcs-svn/*.o ewah/*.o builtin/*.o + $(RM) *.res + $(RM) $(OBJECTS) $(RM) $(LIB_FILE) $(XDIFF_LIB) $(VCSSVN_LIB) $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X $(RM) $(TEST_PROGRAMS) $(NO_INSTALL) @@@ -2527,7 -2491,6 +2528,7 @@@ ALL_COMMANDS += git-gui git-citoo .PHONY: check-docs check-docs:: + $(MAKE) -C Documentation lint-docs @(for v in $(ALL_COMMANDS); \ do \ case "$$v" in \ diff --combined diff.c index cc8e81290a,f77324e9e0..7a4309397c --- a/diff.c +++ b/diff.c @@@ -26,7 -26,6 +26,7 @@@ #endif static int diff_detect_rename_default; +static int diff_compaction_heuristic; /* experimental */ static int diff_rename_limit_default = 400; static int diff_suppress_blank_empty; static int diff_use_color_default = -1; @@@ -169,11 -168,6 +169,11 @@@ long parse_algorithm_value(const char * * never be affected by the setting of diff.renames * the user happens to have in the configuration file. */ +void init_diff_ui_defaults(void) +{ + diff_detect_rename_default = 1; +} + int git_diff_ui_config(const char *var, const char *value, void *cb) { if (!strcmp(var, "diff.color") || !strcmp(var, "color.diff")) { @@@ -190,10 -184,6 +190,10 @@@ diff_detect_rename_default = git_config_rename(var, value); return 0; } + if (!strcmp(var, "diff.compactionheuristic")) { + diff_compaction_heuristic = git_config_bool(var, value); + return 0; + } if (!strcmp(var, "diff.autorefreshindex")) { diff_auto_refresh_index = git_config_bool(var, value); return 0; @@@ -354,6 -344,7 +354,6 @@@ struct emit_callback const char **label_path; struct diff_words_data *diff_words; struct diff_options *opt; - int *found_changesp; struct strbuf *header; }; @@@ -721,6 -712,7 +721,6 @@@ static void emit_rewrite_diff(const cha memset(&ecbdata, 0, sizeof(ecbdata)); ecbdata.color_diff = want_color(o->use_color); - ecbdata.found_changesp = &o->found_changes; ecbdata.ws_rule = whitespace_rule(name_b); ecbdata.opt = o; if (ecbdata.ws_rule & WS_BLANK_AT_EOF) { @@@ -949,7 -941,8 +949,8 @@@ static int find_word_boundaries(mmfile_ { if (word_regex && *begin < buffer->size) { regmatch_t match[1]; - if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) { + if (!regexec_buf(word_regex, buffer->ptr + *begin, + buffer->size - *begin, 1, match, 0)) { char *p = memchr(buffer->ptr + *begin + match[0].rm_so, '\n', match[0].rm_eo - match[0].rm_so); *end = p ? p - buffer->ptr : match[0].rm_eo + *begin; @@@ -1214,13 -1207,12 +1215,13 @@@ static void fn_out_consume(void *priv, struct diff_options *o = ecbdata->opt; const char *line_prefix = diff_line_prefix(o); + o->found_changes = 1; + if (ecbdata->header) { - fprintf(ecbdata->opt->file, "%s", ecbdata->header->buf); + fprintf(o->file, "%s", ecbdata->header->buf); strbuf_reset(ecbdata->header); ecbdata->header = NULL; } - *(ecbdata->found_changesp) = 1; if (ecbdata->label_path[0]) { const char *name_a_tab, *name_b_tab; @@@ -1228,9 -1220,9 +1229,9 @@@ name_a_tab = strchr(ecbdata->label_path[0], ' ') ? "\t" : ""; name_b_tab = strchr(ecbdata->label_path[1], ' ') ? "\t" : ""; - fprintf(ecbdata->opt->file, "%s%s--- %s%s%s\n", + fprintf(o->file, "%s%s--- %s%s%s\n", line_prefix, meta, ecbdata->label_path[0], reset, name_a_tab); - fprintf(ecbdata->opt->file, "%s%s+++ %s%s%s\n", + fprintf(o->file, "%s%s+++ %s%s%s\n", line_prefix, meta, ecbdata->label_path[1], reset, name_b_tab); ecbdata->label_path[0] = ecbdata->label_path[1] = NULL; } @@@ -1248,7 -1240,15 +1249,7 @@@ find_lno(line, ecbdata); emit_hunk_header(ecbdata, line, len); if (line[len-1] != '\n') - putc('\n', ecbdata->opt->file); - return; - } - - if (len < 1) { - emit_line(ecbdata->opt, reset, reset, line, len); - if (ecbdata->diff_words - && ecbdata->diff_words->type == DIFF_WORDS_PORCELAIN) - fputs("~\n", ecbdata->opt->file); + putc('\n', o->file); return; } @@@ -1273,8 -1273,8 +1274,8 @@@ } diff_words_flush(ecbdata); if (ecbdata->diff_words->type == DIFF_WORDS_PORCELAIN) { - emit_line(ecbdata->opt, context, reset, line, len); - fputs("~\n", ecbdata->opt->file); + emit_line(o, context, reset, line, len); + fputs("~\n", o->file); } else { /* * Skip the prefix character, if any. With @@@ -1285,7 -1285,7 +1286,7 @@@ line++; len--; } - emit_line(ecbdata->opt, context, reset, line, len); + emit_line(o, context, reset, line, len); } return; } @@@ -1307,7 -1307,8 +1308,7 @@@ default: /* incomplete line at the end */ ecbdata->lno_in_preimage++; - emit_line(ecbdata->opt, - diff_get_color(ecbdata->color_diff, DIFF_CONTEXT), + emit_line(o, diff_get_color(ecbdata->color_diff, DIFF_CONTEXT), reset, line, len); break; } @@@ -1923,8 -1924,8 +1924,8 @@@ static void show_dirstat(struct diff_op name = p->two->path ? p->two->path : p->one->path; - if (p->one->sha1_valid && p->two->sha1_valid) - content_changed = hashcmp(p->one->sha1, p->two->sha1); + if (p->one->oid_valid && p->two->oid_valid) + content_changed = oidcmp(&p->one->oid, &p->two->oid); else content_changed = 1; @@@ -2296,8 -2297,7 +2297,8 @@@ static void builtin_diff(const char *na const char *add = diff_get_color_opt(o, DIFF_FILE_NEW); show_submodule_summary(o->file, one->path ? one->path : two->path, line_prefix, - one->sha1, two->sha1, two->dirty_submodule, + one->oid.hash, two->oid.hash, + two->dirty_submodule, meta, del, add, reset); return; } @@@ -2375,7 -2375,7 +2376,7 @@@ if (!one->data && !two->data && S_ISREG(one->mode) && S_ISREG(two->mode) && !DIFF_OPT_TST(o, BINARY)) { - if (!hashcmp(one->sha1, two->sha1)) { + if (!oidcmp(&one->oid, &two->oid)) { if (must_show_header) fprintf(o->file, "%s", header.buf); goto free_ab_and_return; @@@ -2428,6 -2428,7 +2429,6 @@@ memset(&ecbdata, 0, sizeof(ecbdata)); ecbdata.label_path = lbl; ecbdata.color_diff = want_color(o->use_color); - ecbdata.found_changesp = &o->found_changes; ecbdata.ws_rule = whitespace_rule(name_b); if (ecbdata.ws_rule & WS_BLANK_AT_EOF) check_blank_at_eof(&mf1, &mf2, &ecbdata); @@@ -2495,7 -2496,7 +2496,7 @@@ static void builtin_diffstat(const cha return; } - same_contents = !hashcmp(one->sha1, two->sha1); + same_contents = !oidcmp(&one->oid, &two->oid); if (diff_filespec_is_binary(one) || diff_filespec_is_binary(two)) { data->is_binary = 1; @@@ -2628,8 -2629,8 +2629,8 @@@ void fill_filespec(struct diff_filespe { if (mode) { spec->mode = canon_mode(mode); - hashcpy(spec->sha1, sha1); - spec->sha1_valid = sha1_valid; + hashcpy(spec->oid.hash, sha1); + spec->oid_valid = sha1_valid; } } @@@ -2672,13 -2673,6 +2673,13 @@@ static int reuse_worktree_file(const ch if (!FAST_WORKING_DIRECTORY && !want_file && has_sha1_pack(sha1)) return 0; + /* + * Similarly, if we'd have to convert the file contents anyway, that + * makes the optimization not worthwhile. + */ + if (!want_file && would_convert_to_git(name)) + return 0; + len = strlen(name); pos = cache_name_pos(name, len); if (pos < 0) @@@ -2718,8 -2712,7 +2719,8 @@@ static int diff_populate_gitlink(struc if (s->dirty_submodule) dirty = "-dirty"; - strbuf_addf(&buf, "Subproject commit %s%s\n", sha1_to_hex(s->sha1), dirty); + strbuf_addf(&buf, "Subproject commit %s%s\n", + oid_to_hex(&s->oid), dirty); s->size = buf.len; if (size_only) { s->data = NULL; @@@ -2762,8 -2755,8 +2763,8 @@@ int diff_populate_filespec(struct diff_ if (S_ISGITLINK(s->mode)) return diff_populate_gitlink(s, size_only); - if (!s->sha1_valid || - reuse_worktree_file(s->path, s->sha1, 0)) { + if (!s->oid_valid || + reuse_worktree_file(s->path, s->oid.hash, 0)) { struct strbuf buf = STRBUF_INIT; struct stat st; int fd; @@@ -2820,10 -2813,9 +2821,10 @@@ else { enum object_type type; if (size_only || (flags & CHECK_BINARY)) { - type = sha1_object_info(s->sha1, &s->size); + type = sha1_object_info(s->oid.hash, &s->size); if (type < 0) - die("unable to read %s", sha1_to_hex(s->sha1)); + die("unable to read %s", + oid_to_hex(&s->oid)); if (size_only) return 0; if (s->size > big_file_threshold && s->is_binary == -1) { @@@ -2831,9 -2823,9 +2832,9 @@@ return 0; } } - s->data = read_sha1_file(s->sha1, &type, &s->size); + s->data = read_sha1_file(s->oid.hash, &type, &s->size); if (!s->data) - die("unable to read %s", sha1_to_hex(s->sha1)); + die("unable to read %s", oid_to_hex(&s->oid)); s->should_free = 1; } return 0; @@@ -2862,7 -2854,7 +2863,7 @@@ void diff_free_filespec_data(struct dif static void prep_temp_blob(const char *path, struct diff_tempfile *temp, void *blob, unsigned long size, - const unsigned char *sha1, + const struct object_id *oid, int mode) { int fd; @@@ -2887,7 -2879,7 +2888,7 @@@ die_errno("unable to write temp-file"); close_tempfile(&temp->tempfile); temp->name = get_tempfile_path(&temp->tempfile); - sha1_to_hex_r(temp->hex, sha1); + oid_to_hex_r(temp->hex, oid); xsnprintf(temp->mode, sizeof(temp->mode), "%06o", mode); strbuf_release(&buf); strbuf_release(&template); @@@ -2911,8 -2903,8 +2912,8 @@@ static struct diff_tempfile *prepare_te } if (!S_ISGITLINK(one->mode) && - (!one->sha1_valid || - reuse_worktree_file(name, one->sha1, 1))) { + (!one->oid_valid || + reuse_worktree_file(name, one->oid.hash, 1))) { struct stat st; if (lstat(name, &st) < 0) { if (errno == ENOENT) @@@ -2924,19 -2916,19 +2925,19 @@@ if (strbuf_readlink(&sb, name, st.st_size) < 0) die_errno("readlink(%s)", name); prep_temp_blob(name, temp, sb.buf, sb.len, - (one->sha1_valid ? - one->sha1 : null_sha1), - (one->sha1_valid ? + (one->oid_valid ? + &one->oid : &null_oid), + (one->oid_valid ? one->mode : S_IFLNK)); strbuf_release(&sb); } else { /* we can borrow from the file in the work tree */ temp->name = name; - if (!one->sha1_valid) + if (!one->oid_valid) sha1_to_hex_r(temp->hex, null_sha1); else - sha1_to_hex_r(temp->hex, one->sha1); + sha1_to_hex_r(temp->hex, one->oid.hash); /* Even though we may sometimes borrow the * contents from the work tree, we always want * one->mode. mode is trustworthy even when @@@ -2951,7 -2943,7 +2952,7 @@@ if (diff_populate_filespec(one, 0)) die("cannot read data blob for %s", one->path); prep_temp_blob(name, temp, one->data, one->size, - one->sha1, one->mode); + &one->oid, one->mode); } return temp; } @@@ -3064,7 -3056,7 +3065,7 @@@ static void fill_metainfo(struct strbu default: *must_show_header = 0; } - if (one && two && hashcmp(one->sha1, two->sha1)) { + if (one && two && oidcmp(&one->oid, &two->oid)) { int abbrev = DIFF_OPT_TST(o, FULL_INDEX) ? 40 : DEFAULT_ABBREV; if (DIFF_OPT_TST(o, BINARY)) { @@@ -3074,8 -3066,8 +3075,8 @@@ abbrev = 40; } strbuf_addf(msg, "%s%sindex %s..", line_prefix, set, - find_unique_abbrev(one->sha1, abbrev)); - strbuf_addstr(msg, find_unique_abbrev(two->sha1, abbrev)); + find_unique_abbrev(one->oid.hash, abbrev)); + strbuf_addstr(msg, find_unique_abbrev(two->oid.hash, abbrev)); if (one->mode == two->mode) strbuf_addf(msg, " %06o", one->mode); strbuf_addf(msg, "%s\n", reset); @@@ -3130,20 -3122,20 +3131,20 @@@ static void run_diff_cmd(const char *pg static void diff_fill_sha1_info(struct diff_filespec *one) { if (DIFF_FILE_VALID(one)) { - if (!one->sha1_valid) { + if (!one->oid_valid) { struct stat st; if (one->is_stdin) { - hashcpy(one->sha1, null_sha1); + oidclr(&one->oid); return; } if (lstat(one->path, &st) < 0) die_errno("stat '%s'", one->path); - if (index_path(one->sha1, one->path, &st, 0)) + if (index_path(one->oid.hash, one->path, &st, 0)) die("cannot hash %s", one->path); } } else - hashclr(one->sha1); + oidclr(&one->oid); } static void strip_prefix(int prefix_length, const char **namep, const char **otherp) @@@ -3282,8 -3274,6 +3283,8 @@@ void diff_setup(struct diff_options *op options->use_color = diff_use_color_default; options->detect_rename = diff_detect_rename_default; options->xdl_opts |= diff_algorithm; + if (diff_compaction_heuristic) + DIFF_XDL_SET(options, COMPACTION_HEURISTIC); options->orderfile = diff_order_file_cfg; @@@ -3804,10 -3794,6 +3805,10 @@@ int diff_opt_parse(struct diff_options DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL); else if (!strcmp(arg, "--ignore-blank-lines")) DIFF_XDL_SET(options, IGNORE_BLANK_LINES); + else if (!strcmp(arg, "--compaction-heuristic")) + DIFF_XDL_SET(options, COMPACTION_HEURISTIC); + else if (!strcmp(arg, "--no-compaction-heuristic")) + DIFF_XDL_CLR(options, COMPACTION_HEURISTIC); else if (!strcmp(arg, "--patience")) options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF); else if (!strcmp(arg, "--histogram")) @@@ -3976,8 -3962,6 +3977,8 @@@ if (!options->file) die_errno("Could not open '%s'", path); options->close_file = 1; + if (options->use_color != GIT_COLOR_ALWAYS) + options->use_color = GIT_COLOR_NEVER; return argcount; } else return 0; @@@ -4119,9 -4103,8 +4120,9 @@@ static void diff_flush_raw(struct diff_ fprintf(opt->file, "%s", diff_line_prefix(opt)); if (!(opt->output_format & DIFF_FORMAT_NAME_STATUS)) { fprintf(opt->file, ":%06o %06o %s ", p->one->mode, p->two->mode, - diff_unique_abbrev(p->one->sha1, opt->abbrev)); - fprintf(opt->file, "%s ", diff_unique_abbrev(p->two->sha1, opt->abbrev)); + diff_unique_abbrev(p->one->oid.hash, opt->abbrev)); + fprintf(opt->file, "%s ", + diff_unique_abbrev(p->two->oid.hash, opt->abbrev)); } if (p->score) { fprintf(opt->file, "%c%03d%c", p->status, similarity_index(p), @@@ -4170,11 -4153,11 +4171,11 @@@ int diff_unmodified_pair(struct diff_fi /* both are valid and point at the same path. that is, we are * dealing with a change. */ - if (one->sha1_valid && two->sha1_valid && - !hashcmp(one->sha1, two->sha1) && + if (one->oid_valid && two->oid_valid && + !oidcmp(&one->oid, &two->oid) && !one->dirty_submodule && !two->dirty_submodule) return 1; /* no change */ - if (!one->sha1_valid && !two->sha1_valid) + if (!one->oid_valid && !two->oid_valid) return 1; /* both look at the same file on the filesystem. */ return 0; } @@@ -4235,7 -4218,7 +4236,7 @@@ void diff_debug_filespec(struct diff_fi s->path, DIFF_FILE_VALID(s) ? "valid" : "invalid", s->mode, - s->sha1_valid ? sha1_to_hex(s->sha1) : ""); + s->oid_valid ? oid_to_hex(&s->oid) : ""); fprintf(stderr, "queue[%d] %s size %lu\n", x, one ? one : "", s->size); @@@ -4305,11 -4288,11 +4306,11 @@@ static void diff_resolve_rename_copy(vo else p->status = DIFF_STATUS_RENAMED; } - else if (hashcmp(p->one->sha1, p->two->sha1) || + else if (oidcmp(&p->one->oid, &p->two->oid) || p->one->mode != p->two->mode || p->one->dirty_submodule || p->two->dirty_submodule || - is_null_sha1(p->one->sha1)) + is_null_oid(&p->one->oid)) p->status = DIFF_STATUS_MODIFIED; else { /* This is a "no-change" entry and should not @@@ -4451,7 -4434,7 +4452,7 @@@ static void patch_id_consume(void *priv } /* returns 0 upon success, and writes result into sha1 */ -static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1) +static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only) { struct diff_queue_struct *q = &diff_queued_diff; int i; @@@ -4486,6 -4469,9 +4487,6 @@@ diff_fill_sha1_info(p->one); diff_fill_sha1_info(p->two); - if (fill_mmfile(&mf1, p->one) < 0 || - fill_mmfile(&mf2, p->two) < 0) - return error("unable to read files to diff"); len1 = remove_space(p->one->path, strlen(p->one->path)); len2 = remove_space(p->two->path, strlen(p->two->path)); @@@ -4520,19 -4506,10 +4521,19 @@@ len2, p->two->path); git_SHA1_Update(&ctx, buffer, len1); + if (diff_header_only) + continue; + + if (fill_mmfile(&mf1, p->one) < 0 || + fill_mmfile(&mf2, p->two) < 0) + return error("unable to read files to diff"); + if (diff_filespec_is_binary(p->one) || diff_filespec_is_binary(p->two)) { - git_SHA1_Update(&ctx, sha1_to_hex(p->one->sha1), 40); - git_SHA1_Update(&ctx, sha1_to_hex(p->two->sha1), 40); + git_SHA1_Update(&ctx, oid_to_hex(&p->one->oid), + 40); + git_SHA1_Update(&ctx, oid_to_hex(&p->two->oid), + 40); continue; } @@@ -4549,11 -4526,11 +4550,11 @@@ return 0; } -int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1) +int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only) { struct diff_queue_struct *q = &diff_queued_diff; int i; - int result = diff_get_patch_id(options, sha1); + int result = diff_get_patch_id(options, sha1, diff_header_only); for (i = 0; i < q->nr; i++) diff_free_filepair(q->queue[i]); @@@ -4824,7 -4801,7 +4825,7 @@@ static int diff_filespec_check_stat_unm */ if (!DIFF_FILE_VALID(p->one) || /* (1) */ !DIFF_FILE_VALID(p->two) || - (p->one->sha1_valid && p->two->sha1_valid) || + (p->one->oid_valid && p->two->oid_valid) || (p->one->mode != p->two->mode) || diff_populate_filespec(p->one, CHECK_SIZE_ONLY) || diff_populate_filespec(p->two, CHECK_SIZE_ONLY) || @@@ -5120,9 -5097,8 +5121,9 @@@ size_t fill_textconv(struct userdiff_dr if (!driver->textconv) die("BUG: fill_textconv called with non-textconv driver"); - if (driver->textconv_cache && df->sha1_valid) { - *outbuf = notes_cache_get(driver->textconv_cache, df->sha1, + if (driver->textconv_cache && df->oid_valid) { + *outbuf = notes_cache_get(driver->textconv_cache, + df->oid.hash, &size); if (*outbuf) return size; @@@ -5132,9 -5108,9 +5133,9 @@@ if (!*outbuf) die("unable to read files to diff"); - if (driver->textconv_cache && df->sha1_valid) { + if (driver->textconv_cache && df->oid_valid) { /* ignore errors, as we might be in a readonly repository */ - notes_cache_put(driver->textconv_cache, df->sha1, *outbuf, + notes_cache_put(driver->textconv_cache, df->oid.hash, *outbuf, size); /* * we could save up changes and flush them all at the end, diff --combined diffcore-pickaxe.c index 55067cab6c,8413d76582..9795ca1c15 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@@ -7,8 -7,6 +7,8 @@@ #include "diffcore.h" #include "xdiff-interface.h" #include "kwset.h" +#include "commit.h" +#include "quote.h" typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two, struct diff_options *o, @@@ -23,7 -21,6 +23,6 @@@ static void diffgrep_consume(void *priv { struct diffgrep_cb *data = priv; regmatch_t regmatch; - int hold; if (line[0] != '+' && line[0] != '-') return; @@@ -33,11 -30,8 +32,8 @@@ * caller early. */ return; - /* Yuck -- line ought to be "const char *"! */ - hold = line[len]; - line[len] = '\0'; - data->hit = !regexec(data->regexp, line + 1, 1, ®match, 0); - line[len] = hold; + data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1, + ®match, 0); } static int diff_grep(mmfile_t *one, mmfile_t *two, @@@ -50,9 -44,11 +46,11 @@@ xdemitconf_t xecfg; if (!one) - return !regexec(regexp, two->ptr, 1, ®match, 0); + return !regexec_buf(regexp, two->ptr, two->size, + 1, ®match, 0); if (!two) - return !regexec(regexp, one->ptr, 1, ®match, 0); + return !regexec_buf(regexp, one->ptr, one->size, + 1, ®match, 0); /* * We have both sides; need to run textual diff and see if @@@ -83,8 -79,8 +81,8 @@@ static unsigned int contains(mmfile_t * regmatch_t regmatch; int flags = 0; - assert(data[sz] == '\0'); - while (*data && !regexec(regexp, data, 1, ®match, flags)) { + while (*data && + !regexec_buf(regexp, data, sz, 1, ®match, flags)) { flags |= REG_NOTBOL; data += regmatch.rm_eo; if (*data && regmatch.rm_so == regmatch.rm_eo) @@@ -200,18 -196,6 +198,18 @@@ static void pickaxe(struct diff_queue_s *q = outq; } +static void regcomp_or_die(regex_t *regex, const char *needle, int cflags) +{ + int err = regcomp(regex, needle, cflags); + if (err) { + /* The POSIX.2 people are surely sick */ + char errbuf[1024]; + regerror(err, regex, errbuf, 1024); + regfree(regex); + die("invalid regex: %s", errbuf); + } +} + void diffcore_pickaxe(struct diff_options *o) { const char *needle = o->pickaxe; @@@ -220,19 -204,18 +218,19 @@@ kwset_t kws = NULL; if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) { - int err; int cflags = REG_EXTENDED | REG_NEWLINE; if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)) cflags |= REG_ICASE; - err = regcomp(®ex, needle, cflags); - if (err) { - /* The POSIX.2 people are surely sick */ - char errbuf[1024]; - regerror(err, ®ex, errbuf, 1024); - regfree(®ex); - die("invalid regex: %s", errbuf); - } + regcomp_or_die(®ex, needle, cflags); + regexp = ®ex; + } else if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) && + has_non_ascii(needle)) { + struct strbuf sb = STRBUF_INIT; + int cflags = REG_NEWLINE | REG_ICASE; + + basic_regex_quote_buf(&sb, needle); + regcomp_or_die(®ex, sb.buf, cflags); + strbuf_release(&sb); regexp = ®ex; } else { kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE) diff --combined git-compat-util.h index 68615b1a6a,7047d281e5..2c949983dc --- a/git-compat-util.h +++ b/git-compat-util.h @@@ -409,9 -409,7 +409,9 @@@ extern NORETURN void usagef(const char extern NORETURN void die(const char *err, ...) __attribute__((format (printf, 1, 2))); extern NORETURN void die_errno(const char *err, ...) __attribute__((format (printf, 1, 2))); extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); +extern int error_errno(const char *err, ...) __attribute__((format (printf, 1, 2))); extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); +extern void warning_errno(const char *err, ...) __attribute__((format (printf, 1, 2))); #ifndef NO_OPENSSL #ifdef APPLE_COMMON_CRYPTO @@@ -436,7 -434,6 +436,7 @@@ static inline int const_error(void return -1; } #define error(...) (error(__VA_ARGS__), const_error()) +#define error_errno(...) (error_errno(__VA_ARGS__), const_error()) #endif extern void set_die_routine(NORETURN_PTR void (*routine)(const char *err, va_list params)); @@@ -474,23 -471,6 +474,23 @@@ static inline int skip_prefix(const cha return 0; } +/* + * Like skip_prefix, but promises never to read past "len" bytes of the input + * buffer, and returns the remaining number of bytes in "out" via "outlen". + */ +static inline int skip_prefix_mem(const char *buf, size_t len, + const char *prefix, + const char **out, size_t *outlen) +{ + size_t prefix_len = strlen(prefix); + if (prefix_len <= len && !memcmp(buf, prefix, prefix_len)) { + *out = buf + prefix_len; + *outlen = len - prefix_len; + return 1; + } + return 0; +} + /* * If buf ends with suffix, return 1 and subtract the length of the suffix * from *len. Otherwise, return 0 and leave *len untouched. @@@ -664,22 -644,10 +664,22 @@@ void *gitmemmem(const void *haystack, s const void *needle, size_t needlelen); #endif +#ifdef OVERRIDE_STRDUP +#ifdef strdup +#undef strdup +#endif +#define strdup gitstrdup +char *gitstrdup(const char *s); +#endif + #ifdef NO_GETPAGESIZE #define getpagesize() sysconf(_SC_PAGESIZE) #endif +#ifndef O_CLOEXEC +#define O_CLOEXEC 0 +#endif + #ifdef FREAD_READS_DIRECTORIES #ifdef fopen #undef fopen @@@ -828,7 -796,7 +828,7 @@@ extern FILE *fopen_for_writing(const ch * you can do: * * struct foo *f; - * FLEX_ALLOC_STR(f, name, src); + * FLEXPTR_ALLOC_STR(f, name, src); * * and "name" will point to a block of memory after the struct, which will be * freed along with the struct (but the pointer can be repointed anywhere). @@@ -974,6 -942,19 +974,19 @@@ void git_qsort(void *base, size_t nmemb #define qsort git_qsort #endif + #ifndef REG_STARTEND + #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd" + #endif + + static inline int regexec_buf(const regex_t *preg, const char *buf, size_t size, + size_t nmatch, regmatch_t pmatch[], int eflags) + { + assert(nmatch > 0 && pmatch); + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = size; + return regexec(preg, buf, nmatch, pmatch, eflags | REG_STARTEND); + } + #ifndef DIR_HAS_BSD_GROUP_SEMANTICS # define FORCE_DIR_SET_GID S_ISGID #else @@@ -1075,5 -1056,3 +1088,5 @@@ struct tm *git_gmtime_r(const time_t * #endif #endif + +extern int cmd_main(int, const char **); diff --combined grep.c index d7d00b87cb,8ed56236f0..1194d35b5d --- a/grep.c +++ b/grep.c @@@ -4,8 -4,6 +4,8 @@@ #include "xdiff-interface.h" #include "diff.h" #include "diffcore.h" +#include "commit.h" +#include "quote.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@@ -163,7 -161,17 +163,7 @@@ void grep_init(struct grep_opt *opt, co color_set(opt->color_sep, def->color_sep); } -void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt) -{ - if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED) - grep_set_pattern_type_option(pattern_type, opt); - else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED) - grep_set_pattern_type_option(opt->pattern_type_option, opt); - else if (opt->extended_regexp_option) - grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt); -} - -void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt) +static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt) { switch (pattern_type) { case GREP_PATTERN_TYPE_UNSPECIFIED: @@@ -195,16 -203,6 +195,16 @@@ } } +void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt) +{ + if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED) + grep_set_pattern_type_option(pattern_type, opt); + else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED) + grep_set_pattern_type_option(opt->pattern_type_option, opt); + else if (opt->extended_regexp_option) + grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt); +} + static struct grep_pat *create_grep_pat(const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t, @@@ -324,16 -322,11 +324,16 @@@ static void compile_pcre_regexp(struct int erroffset; int options = PCRE_MULTILINE; - if (opt->ignore_case) + if (opt->ignore_case) { + if (has_non_ascii(p->pattern)) + p->pcre_tables = pcre_maketables(); options |= PCRE_CASELESS; + } + if (is_utf8_locale() && has_non_ascii(p->pattern)) + options |= PCRE_UTF8; p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset, - NULL); + p->pcre_tables); if (!p->pcre_regexp) compile_regexp_failed(p, error); @@@ -367,7 -360,6 +367,7 @@@ static void free_pcre_regexp(struct gre { pcre_free(p->pcre_regexp); pcre_free(p->pcre_extra_info); + pcre_free((void *)p->pcre_tables); } #else /* !USE_LIBPCRE */ static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt) @@@ -404,68 -396,26 +404,68 @@@ static int is_fixed(const char *s, size return 1; } +static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) +{ + struct strbuf sb = STRBUF_INIT; + int err; + int regflags; + + basic_regex_quote_buf(&sb, p->pattern); + regflags = opt->regflags & ~REG_EXTENDED; + if (opt->ignore_case) + regflags |= REG_ICASE; + err = regcomp(&p->regexp, sb.buf, regflags); + if (opt->debug) + fprintf(stderr, "fixed %s\n", sb.buf); + strbuf_release(&sb); + if (err) { + char errbuf[1024]; + regerror(err, &p->regexp, errbuf, sizeof(errbuf)); + regfree(&p->regexp); + compile_regexp_failed(p, errbuf); + } +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { + int icase, ascii_only; int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; + icase = opt->regflags & REG_ICASE || p->ignore_case; + ascii_only = !has_non_ascii(p->pattern); + /* + * Even when -F (fixed) asks us to do a non-regexp search, we + * may not be able to correctly case-fold when -i + * (ignore-case) is asked (in which case, we'll synthesize a + * regexp to match the pattern that matches regexp special + * characters literally, while ignoring case differences). On + * the other hand, even without -F, if the pattern does not + * have any regexp special characters and there is no need for + * case-folding search, we can internally turn it into a + * simple string match using kws. p->fixed tells us if we + * want to use kws. + */ if (opt->fixed || is_fixed(p->pattern, p->patternlen)) - p->fixed = 1; + p->fixed = !icase || ascii_only; else p->fixed = 0; if (p->fixed) { - if (opt->regflags & REG_ICASE || p->ignore_case) - p->kws = kwsalloc(tolower_trans_tbl); - else - p->kws = kwsalloc(NULL); + p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL); kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; + } else if (opt->fixed) { + /* + * We come here when the pattern has the non-ascii + * characters we cannot case-fold, and asked to + * ignore-case. + */ + compile_fixed_regexp(p, opt); + return; } if (opt->pcre) { @@@ -693,10 -643,10 +693,10 @@@ static struct grep_expr *prep_header_pa for (p = opt->header_list; p; p = p->next) { if (p->token != GREP_PATTERN_HEAD) - die("bug: a non-header pattern in grep header list."); + die("BUG: a non-header pattern in grep header list."); if (p->field < GREP_HEADER_FIELD_MIN || GREP_HEADER_FIELD_MAX <= p->field) - die("bug: unknown header field %d", p->field); + die("BUG: unknown header field %d", p->field); compile_regexp(p, opt); } @@@ -709,7 -659,7 +709,7 @@@ h = compile_pattern_atom(&pp); if (!h || pp != p->next) - die("bug: malformed header expr"); + die("BUG: malformed header expr"); if (!header_group[p->field]) { header_group[p->field] = h; continue; @@@ -898,17 -848,6 +898,6 @@@ static int fixmatch(struct grep_pat *p } } - static int regmatch(const regex_t *preg, char *line, char *eol, - regmatch_t *match, int eflags) - { - #ifdef REG_STARTEND - match->rm_so = 0; - match->rm_eo = eol - line; - eflags |= REG_STARTEND; - #endif - return regexec(preg, line, 1, match, eflags); - } - static int patmatch(struct grep_pat *p, char *line, char *eol, regmatch_t *match, int eflags) { @@@ -919,7 -858,8 +908,8 @@@ else if (p->pcre_regexp) hit = !pcrematch(p, line, eol, match, eflags); else - hit = !regmatch(&p->regexp, line, eol, match, eflags); + hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, + eflags); return hit; } @@@ -1446,17 -1386,9 +1436,17 @@@ static int fill_textconv_grep(struct us return 0; } +static int is_empty_line(const char *bol, const char *eol) +{ + while (bol < eol && isspace(*bol)) + bol++; + return bol == eol; +} + static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits) { char *bol; + char *peek_bol = NULL; unsigned long left; unsigned lno = 1; unsigned last_hit = 0; @@@ -1514,7 -1446,7 +1504,7 @@@ case GREP_BINARY_TEXT: break; default: - die("bug: unknown binary handling mode"); + die("BUG: unknown binary handling mode"); } } @@@ -1601,24 -1533,8 +1591,24 @@@ show_function = 1; goto next_line; } - if (show_function && match_funcname(opt, gs, bol, eol)) - show_function = 0; + if (show_function && (!peek_bol || peek_bol < bol)) { + unsigned long peek_left = left; + char *peek_eol = eol; + + /* + * Trailing empty lines are not interesting. + * Peek past them to see if they belong to the + * body of the current function. + */ + peek_bol = bol; + while (is_empty_line(peek_bol, peek_eol)) { + peek_bol = peek_eol + 1; + peek_eol = end_of_line(peek_bol, &peek_left); + } + + if (match_funcname(opt, gs, peek_bol, peek_eol)) + show_function = 0; + } if (show_function || (last_hit && lno <= last_hit + opt->post_context)) { /* If the last hit is within the post context, @@@ -1806,7 -1722,7 +1796,7 @@@ static int grep_source_load_file(struc if (lstat(filename, &st) < 0) { err_ret: if (errno != ENOENT) - error(_("'%s': %s"), filename, strerror(errno)); + error_errno(_("failed to stat '%s'"), filename); return -1; } if (!S_ISREG(st.st_mode)) @@@ -1817,7 -1733,7 +1807,7 @@@ goto err_ret; data = xmallocz(size); if (st.st_size != read_in_full(i, data, size)) { - error(_("'%s': short read %s"), filename, strerror(errno)); + error_errno(_("'%s': short read"), filename); close(i); free(data); return -1; diff --combined xdiff-interface.c index f34ea762e4,08a7313e6a..50702a215d --- a/xdiff-interface.c +++ b/xdiff-interface.c @@@ -100,9 -100,9 +100,9 @@@ static int xdiff_outf(void *priv_, mmbu /* * Trim down common substring at the end of the buffers, - * but leave at least ctx lines at the end. + * but end on a complete line. */ -static void trim_common_tail(mmfile_t *a, mmfile_t *b, long ctx) +static void trim_common_tail(mmfile_t *a, mmfile_t *b) { const int blk = 1024; long trimmed = 0, recovered = 0; @@@ -110,6 -110,9 +110,6 @@@ char *bp = b->ptr + b->size; long smaller = (a->size < b->size) ? a->size : b->size; - if (ctx) - return; - while (blk + trimmed <= smaller && !memcmp(ap - blk, bp - blk, blk)) { trimmed += blk; ap -= blk; @@@ -131,8 -134,7 +131,8 @@@ int xdi_diff(mmfile_t *mf1, mmfile_t *m if (mf1->size > MAX_XDIFF_SIZE || mf2->size > MAX_XDIFF_SIZE) return -1; - trim_common_tail(&a, &b, xecfg->ctxlen); + if (!xecfg->ctxlen && !(xecfg->flags & XDL_EMIT_FUNCCONTEXT)) + trim_common_tail(&a, &b); return xdl_diff(&a, &b, xpp, xecfg, xecb); } @@@ -214,11 -216,10 +214,10 @@@ struct ff_regs static long ff_regexp(const char *line, long len, char *buffer, long buffer_size, void *priv) { - char *line_buffer; struct ff_regs *regs = priv; regmatch_t pmatch[2]; int i; - int result = -1; + int result; /* Exclude terminating newline (and cr) from matching */ if (len > 0 && line[len-1] == '\n') { @@@ -228,18 -229,16 +227,16 @@@ len--; } - line_buffer = xstrndup(line, len); /* make NUL terminated */ - for (i = 0; i < regs->nr; i++) { struct ff_reg *reg = regs->array + i; - if (!regexec(®->re, line_buffer, 2, pmatch, 0)) { + if (!regexec_buf(®->re, line, len, 2, pmatch, 0)) { if (reg->negate) - goto fail; + return -1; break; } } if (regs->nr <= i) - goto fail; + return -1; i = pmatch[1].rm_so >= 0 ? 1 : 0; line += pmatch[i].rm_so; result = pmatch[i].rm_eo - pmatch[i].rm_so; @@@ -248,8 -247,6 +245,6 @@@ while (result > 0 && (isspace(line[result - 1]))) result--; memcpy(buffer, line, result); - fail: - free(line_buffer); return result; }