From: Junio C Hamano <gitster@pobox.com>
Date: Sat, 22 Aug 2009 01:47:48 +0000 (-0700)
Subject: Merge branch 'gb/apply-ignore-whitespace'
X-Git-Tag: v1.6.5-rc0~53
X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/5e092b5bcea48c098af7000f888a2a0f16c9db77?hp=-c

Merge branch 'gb/apply-ignore-whitespace'

* gb/apply-ignore-whitespace:
git apply: option to ignore whitespace differences
---

5e092b5bcea48c098af7000f888a2a0f16c9db77
diff --combined Documentation/config.txt
index 2632c5149e,0b53cab6af..5256c7fb81
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@@ -461,6 -461,14 +461,14 @@@ it will be treated as a shell command
  executed from the top-level directory of a repository, which may
  not necessarily be the current directory.
  
+ apply.ignorewhitespace::
+ 	When set to 'change', tells 'git-apply' to ignore changes in
+ 	whitespace, in the same way as the '--ignore-space-change'
+ 	option.
+ 	When set to one of: no, none, never, false tells 'git-apply' to
+ 	respect all whitespace differences.
+ 	See linkgit:git-apply[1].
+ 
  apply.whitespace::
  	Tells 'git-apply' how to handle whitespaces, in the same way
  	as the '--whitespace' option. See linkgit:git-apply[1].
@@@ -605,7 -613,7 +613,7 @@@ color.interactive.<slot>:
  	Use customized color for 'git-add --interactive'
  	output. `<slot>` may be `prompt`, `header`, `help` or `error`, for
  	four distinct types of normal output from interactive
 -	programs.  The values of these variables may be specified as
 +	commands.  The values of these variables may be specified as
  	in color.branch.<slot>.
  
  color.pager::
@@@ -1113,7 -1121,7 +1121,7 @@@ instaweb.port:
  	linkgit:git-instaweb[1].
  
  interactive.singlekey::
 -	In interactive programs, allow the user to provide one-letter
 +	In interactive commands, allow the user to provide one-letter
  	input with a single key (i.e., without hitting enter).
  	Currently this is used only by the `\--patch` mode of
  	linkgit:git-add[1].  Note that this setting is silently
@@@ -1218,20 -1226,12 +1226,20 @@@ pack.compression:
  
  pack.deltaCacheSize::
  	The maximum memory in bytes used for caching deltas in
 -	linkgit:git-pack-objects[1].
 -	A value of 0 means no limit. Defaults to 0.
 +	linkgit:git-pack-objects[1] before writing them out to a pack.
 +	This cache is used to speed up the writing object phase by not
 +	having to recompute the final delta result once the best match
 +	for all objects is found.  Repacking large repositories on machines
 +	which are tight with memory might be badly impacted by this though,
 +	especially if this cache pushes the system into swapping.
 +	A value of 0 means no limit. The smallest size of 1 byte may be
 +	used to virtually disable this cache. Defaults to 256 MiB.
  
  pack.deltaCacheLimit::
  	The maximum size of a delta, that is cached in
 -	linkgit:git-pack-objects[1]. Defaults to 1000.
 +	linkgit:git-pack-objects[1]. This cache is used to speed up the
 +	writing object phase by not having to recompute the final delta
 +	result once the best match for all objects is found. Defaults to 1000.
  
  pack.threads::
  	Specifies the number of threads to spawn when searching for best
diff --combined builtin-apply.c
index 39dc96ae02,d9303578b6..ae11b41ef2
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@@ -61,6 -61,13 +61,13 @@@ static enum ws_error_action 
  static int whitespace_error;
  static int squelch_whitespace_errors = 5;
  static int applied_after_fixing_ws;
+ 
+ static enum ws_ignore {
+ 	ignore_ws_none,
+ 	ignore_ws_change,
+ } ws_ignore_action = ignore_ws_none;
+ 
+ 
  static const char *patch_input_file;
  static const char *root;
  static int root_len;
@@@ -97,6 -104,21 +104,21 @@@ static void parse_whitespace_option(con
  	die("unrecognized whitespace option '%s'", option);
  }
  
+ static void parse_ignorewhitespace_option(const char *option)
+ {
+ 	if (!option || !strcmp(option, "no") ||
+ 	    !strcmp(option, "false") || !strcmp(option, "never") ||
+ 	    !strcmp(option, "none")) {
+ 		ws_ignore_action = ignore_ws_none;
+ 		return;
+ 	}
+ 	if (!strcmp(option, "change")) {
+ 		ws_ignore_action = ignore_ws_change;
+ 		return;
+ 	}
+ 	die("unrecognized whitespace ignore option '%s'", option);
+ }
+ 
  static void set_default_whitespace_mode(const char *whitespace_option)
  {
  	if (!whitespace_option && !apply_default_whitespace)
@@@ -214,6 -236,62 +236,62 @@@ static uint32_t hash_line(const char *c
  	return h;
  }
  
+ /*
+  * Compare lines s1 of length n1 and s2 of length n2, ignoring
+  * whitespace difference. Returns 1 if they match, 0 otherwise
+  */
+ static int fuzzy_matchlines(const char *s1, size_t n1,
+ 			    const char *s2, size_t n2)
+ {
+ 	const char *last1 = s1 + n1 - 1;
+ 	const char *last2 = s2 + n2 - 1;
+ 	int result = 0;
+ 
+ 	if (n1 < 0 || n2 < 0)
+ 		return 0;
+ 
+ 	/* ignore line endings */
+ 	while ((*last1 == '\r') || (*last1 == '\n'))
+ 		last1--;
+ 	while ((*last2 == '\r') || (*last2 == '\n'))
+ 		last2--;
+ 
+ 	/* skip leading whitespace */
+ 	while (isspace(*s1) && (s1 <= last1))
+ 		s1++;
+ 	while (isspace(*s2) && (s2 <= last2))
+ 		s2++;
+ 	/* early return if both lines are empty */
+ 	if ((s1 > last1) && (s2 > last2))
+ 		return 1;
+ 	while (!result) {
+ 		result = *s1++ - *s2++;
+ 		/*
+ 		 * Skip whitespace inside. We check for whitespace on
+ 		 * both buffers because we don't want "a b" to match
+ 		 * "ab"
+ 		 */
+ 		if (isspace(*s1) && isspace(*s2)) {
+ 			while (isspace(*s1) && s1 <= last1)
+ 				s1++;
+ 			while (isspace(*s2) && s2 <= last2)
+ 				s2++;
+ 		}
+ 		/*
+ 		 * If we reached the end on one side only,
+ 		 * lines don't match
+ 		 */
+ 		if (
+ 		    ((s2 > last2) && (s1 <= last1)) ||
+ 		    ((s1 > last1) && (s2 <= last2)))
+ 			return 0;
+ 		if ((s1 > last1) && (s2 > last2))
+ 			break;
+ 	}
+ 
+ 	return !result;
+ }
+ 
  static void add_line_info(struct image *img, const char *bol, size_t len, unsigned flag)
  {
  	ALLOC_GROW(img->line_allocated, img->nr + 1, img->alloc);
@@@ -457,76 -535,6 +535,76 @@@ static int guess_p_value(const char *na
  	return val;
  }
  
 +/*
 + * Does the ---/+++ line has the POSIX timestamp after the last HT?
 + * GNU diff puts epoch there to signal a creation/deletion event.  Is
 + * this such a timestamp?
 + */
 +static int has_epoch_timestamp(const char *nameline)
 +{
 +	/*
 +	 * We are only interested in epoch timestamp; any non-zero
 +	 * fraction cannot be one, hence "(\.0+)?" in the regexp below.
 +	 * For the same reason, the date must be either 1969-12-31 or
 +	 * 1970-01-01, and the seconds part must be "00".
 +	 */
 +	const char stamp_regexp[] =
 +		"^(1969-12-31|1970-01-01)"
 +		" "
 +		"[0-2][0-9]:[0-5][0-9]:00(\\.0+)?"
 +		" "
 +		"([-+][0-2][0-9][0-5][0-9])\n";
 +	const char *timestamp = NULL, *cp;
 +	static regex_t *stamp;
 +	regmatch_t m[10];
 +	int zoneoffset;
 +	int hourminute;
 +	int status;
 +
 +	for (cp = nameline; *cp != '\n'; cp++) {
 +		if (*cp == '\t')
 +			timestamp = cp + 1;
 +	}
 +	if (!timestamp)
 +		return 0;
 +	if (!stamp) {
 +		stamp = xmalloc(sizeof(*stamp));
 +		if (regcomp(stamp, stamp_regexp, REG_EXTENDED)) {
 +			warning("Cannot prepare timestamp regexp %s",
 +				stamp_regexp);
 +			return 0;
 +		}
 +	}
 +
 +	status = regexec(stamp, timestamp, ARRAY_SIZE(m), m, 0);
 +	if (status) {
 +		if (status != REG_NOMATCH)
 +			warning("regexec returned %d for input: %s",
 +				status, timestamp);
 +		return 0;
 +	}
 +
 +	zoneoffset = strtol(timestamp + m[3].rm_so + 1, NULL, 10);
 +	zoneoffset = (zoneoffset / 100) * 60 + (zoneoffset % 100);
 +	if (timestamp[m[3].rm_so] == '-')
 +		zoneoffset = -zoneoffset;
 +
 +	/*
 +	 * YYYY-MM-DD hh:mm:ss must be from either 1969-12-31
 +	 * (west of GMT) or 1970-01-01 (east of GMT)
 +	 */
 +	if ((zoneoffset < 0 && memcmp(timestamp, "1969-12-31", 10)) ||
 +	    (0 <= zoneoffset && memcmp(timestamp, "1970-01-01", 10)))
 +		return 0;
 +
 +	hourminute = (strtol(timestamp + 11, NULL, 10) * 60 +
 +		      strtol(timestamp + 14, NULL, 10) -
 +		      zoneoffset);
 +
 +	return ((zoneoffset < 0 && hourminute == 1440) ||
 +		(0 <= zoneoffset && !hourminute));
 +}
 +
  /*
   * Get the name etc info from the ---/+++ lines of a traditional patch header
   *
@@@ -563,17 -571,7 +641,17 @@@ static void parse_traditional_patch(con
  	} else {
  		name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
  		name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB);
 -		patch->old_name = patch->new_name = name;
 +		if (has_epoch_timestamp(first)) {
 +			patch->is_new = 1;
 +			patch->is_delete = 0;
 +			patch->new_name = name;
 +		} else if (has_epoch_timestamp(second)) {
 +			patch->is_new = 0;
 +			patch->is_delete = 1;
 +			patch->old_name = name;
 +		} else {
 +			patch->old_name = patch->new_name = name;
 +		}
  	}
  	if (!name)
  		die("unable to find filename in patch at line %d", linenr);
@@@ -1672,10 -1670,17 +1750,17 @@@ static int read_old_data(struct stat *s
  	}
  }
  
+ /*
+  * Update the preimage, and the common lines in postimage,
+  * from buffer buf of length len. If postlen is 0 the postimage
+  * is updated in place, otherwise it's updated on a new buffer
+  * of length postlen
+  */
+ 
  static void update_pre_post_images(struct image *preimage,
  				   struct image *postimage,
  				   char *buf,
- 				   size_t len)
+ 				   size_t len, size_t postlen)
  {
  	int i, ctx;
  	char *new, *old, *fixed;
@@@ -1694,11 -1699,19 +1779,19 @@@
  	*preimage = fixed_preimage;
  
  	/*
- 	 * Adjust the common context lines in postimage, in place.
- 	 * This is possible because whitespace fixing does not make
- 	 * the string grow.
+ 	 * Adjust the common context lines in postimage. This can be
+ 	 * done in-place when we are just doing whitespace fixing,
+ 	 * which does not make the string grow, but needs a new buffer
+ 	 * when ignoring whitespace causes the update, since in this case
+ 	 * we could have e.g. tabs converted to multiple spaces.
+ 	 * We trust the caller to tell us if the update can be done
+ 	 * in place (postlen==0) or not.
  	 */
- 	new = old = postimage->buf;
+ 	old = postimage->buf;
+ 	if (postlen)
+ 		new = postimage->buf = xmalloc(postlen);
+ 	else
+ 		new = old;
  	fixed = preimage->buf;
  	for (i = ctx = 0; i < postimage->nr; i++) {
  		size_t len = postimage->line[i].len;
@@@ -1773,12 -1786,58 +1866,58 @@@ static int match_fragment(struct image 
  	    !memcmp(img->buf + try, preimage->buf, preimage->len))
  		return 1;
  
+ 	/*
+ 	 * No exact match. If we are ignoring whitespace, run a line-by-line
+ 	 * fuzzy matching. We collect all the line length information because
+ 	 * we need it to adjust whitespace if we match.
+ 	 */
+ 	if (ws_ignore_action == ignore_ws_change) {
+ 		size_t imgoff = 0;
+ 		size_t preoff = 0;
+ 		size_t postlen = postimage->len;
+ 		size_t imglen[preimage->nr];
+ 		for (i = 0; i < preimage->nr; i++) {
+ 			size_t prelen = preimage->line[i].len;
+ 
+ 			imglen[i] = img->line[try_lno+i].len;
+ 			if (!fuzzy_matchlines(
+ 				img->buf + try + imgoff, imglen[i],
+ 				preimage->buf + preoff, prelen))
+ 				return 0;
+ 			if (preimage->line[i].flag & LINE_COMMON)
+ 				postlen += imglen[i] - prelen;
+ 			imgoff += imglen[i];
+ 			preoff += prelen;
+ 		}
+ 
+ 		/*
+ 		 * Ok, the preimage matches with whitespace fuzz. Update it and
+ 		 * the common postimage lines to use the same whitespace as the
+ 		 * target. imgoff now holds the true length of the target that
+ 		 * matches the preimage, and we need to update the line lengths
+ 		 * of the preimage to match the target ones.
+ 		 */
+ 		fixed_buf = xmalloc(imgoff);
+ 		memcpy(fixed_buf, img->buf + try, imgoff);
+ 		for (i = 0; i < preimage->nr; i++)
+ 			preimage->line[i].len = imglen[i];
+ 
+ 		/*
+ 		 * Update the preimage buffer and the postimage context lines.
+ 		 */
+ 		update_pre_post_images(preimage, postimage,
+ 				fixed_buf, imgoff, postlen);
+ 		return 1;
+ 	}
+ 
  	if (ws_error_action != correct_ws_error)
  		return 0;
  
  	/*
  	 * The hunk does not apply byte-by-byte, but the hash says
- 	 * it might with whitespace fuzz.
+ 	 * it might with whitespace fuzz. We haven't been asked to
+ 	 * ignore whitespace, we were asked to correct whitespace
+ 	 * errors, so let's try matching after whitespace correction.
  	 */
  	fixed_buf = xmalloc(preimage->len + 1);
  	buf = fixed_buf;
@@@ -1830,7 -1889,7 +1969,7 @@@
  	 * hunk match.  Update the context lines in the postimage.
  	 */
  	update_pre_post_images(preimage, postimage,
- 			       fixed_buf, buf - fixed_buf);
+ 			       fixed_buf, buf - fixed_buf, 0);
  	return 1;
  
   unmatch_exit:
@@@ -3272,6 -3331,8 +3411,8 @@@ static int git_apply_config(const char 
  {
  	if (!strcmp(var, "apply.whitespace"))
  		return git_config_string(&apply_default_whitespace, var, value);
+ 	else if (!strcmp(var, "apply.ignorewhitespace"))
+ 		return git_config_string(&apply_default_ignorewhitespace, var, value);
  	return git_default_config(var, value, cb);
  }
  
@@@ -3308,6 -3369,16 +3449,16 @@@ static int option_parse_z(const struct 
  	return 0;
  }
  
+ static int option_parse_space_change(const struct option *opt,
+ 			  const char *arg, int unset)
+ {
+ 	if (unset)
+ 		ws_ignore_action = ignore_ws_none;
+ 	else
+ 		ws_ignore_action = ignore_ws_change;
+ 	return 0;
+ }
+ 
  static int option_parse_whitespace(const struct option *opt,
  				   const char *arg, int unset)
  {
@@@ -3384,6 -3455,12 +3535,12 @@@ int cmd_apply(int argc, const char **ar
  		{ OPTION_CALLBACK, 0, "whitespace", &whitespace_option, "action",
  			"detect new or modified lines that have whitespace errors",
  			0, option_parse_whitespace },
+ 		{ OPTION_CALLBACK, 0, "ignore-space-change", NULL, NULL,
+ 			"ignore changes in whitespace when finding context",
+ 			PARSE_OPT_NOARG, option_parse_space_change },
+ 		{ OPTION_CALLBACK, 0, "ignore-whitespace", NULL, NULL,
+ 			"ignore changes in whitespace when finding context",
+ 			PARSE_OPT_NOARG, option_parse_space_change },
  		OPT_BOOLEAN('R', "reverse", &apply_in_reverse,
  			"apply the patch in reverse"),
  		OPT_BOOLEAN(0, "unidiff-zero", &unidiff_zero,
@@@ -3408,6 -3485,8 +3565,8 @@@
  	git_config(git_apply_config, NULL);
  	if (apply_default_whitespace)
  		parse_whitespace_option(apply_default_whitespace);
+ 	if (apply_default_ignorewhitespace)
+ 		parse_ignorewhitespace_option(apply_default_ignorewhitespace);
  
  	argc = parse_options(argc, argv, prefix, builtin_apply_options,
  			apply_usage, 0);
diff --combined cache.h
index 9222774e6c,695212fca9..eee717c81b
--- a/cache.h
+++ b/cache.h
@@@ -468,9 -468,6 +468,9 @@@ extern int index_fd(unsigned char *sha1
  extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object);
  extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
  
 +/* "careful lstat()" */
 +extern int check_path(const char *path, int len, struct stat *st);
 +
  #define REFRESH_REALLY		0x0001	/* ignore_valid */
  #define REFRESH_UNMERGED	0x0002	/* allow unmerged */
  #define REFRESH_QUIET		0x0004	/* be quiet about it */
@@@ -512,6 -509,7 +512,7 @@@ extern int log_all_ref_updates
  extern int warn_ambiguous_refs;
  extern int shared_repository;
  extern const char *apply_default_whitespace;
+ extern const char *apply_default_ignorewhitespace;
  extern int zlib_compression_level;
  extern int core_compression_level;
  extern int core_compression_seen;
diff --combined contrib/completion/git-completion.bash
index 5543dc4d14,dd7ec5dd66..bf688e12e6
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@@ -674,6 -674,7 +674,7 @@@ _git_am (
  	--*)
  		__gitcomp "
  			--3way --committer-date-is-author-date --ignore-date
+ 			--ignore-whitespace --ignore-space-change
  			--interactive --keep --no-utf8 --signoff --utf8
  			--whitespace=
  			"
@@@ -695,6 -696,7 +696,7 @@@ _git_apply (
  			--stat --numstat --summary --check --index
  			--cached --index-info --reverse --reject --unidiff-zero
  			--apply --no-add --exclude=
+ 			--ignore-whitespace --ignore-space-change
  			--whitespace= --inaccurate-eof --verbose
  			"
  		return
@@@ -1047,7 -1049,6 +1049,7 @@@ _git_grep (
  			--extended-regexp --basic-regexp --fixed-strings
  			--files-with-matches --name-only
  			--files-without-match
 +			--max-depth
  			--count
  			--and --or --not --all-match
  			"
@@@ -1537,6 -1538,7 +1539,7 @@@ _git_config (
  	__gitcomp "
  		add.ignore-errors
  		alias.
+ 		apply.ignorewhitespace
  		apply.whitespace
  		branch.autosetupmerge
  		branch.autosetuprebase
diff --combined git-am.sh
index 5396be2279,ab39b3c2fa..3c03f3e0df
--- a/git-am.sh
+++ b/git-am.sh
@@@ -16,6 -16,8 +16,8 @@@ s,signoff       add a Signed-off-by lin
  u,utf8          recode into utf8 (default)
  k,keep          pass -k flag to git-mailinfo
  whitespace=     pass it through git-apply
+ ignore-space-change pass it through git-apply
+ ignore-whitespace pass it through git-apply
  directory=      pass it through git-apply
  C=              pass it through git-apply
  p=              pass it through git-apply
@@@ -191,33 -193,13 +193,33 @@@ check_patch_format () 
  			esac
  			;;
  		esac
 +		if test -z "$patch_format" &&
 +			test -n "$l1" &&
 +			test -n "$l2" &&
 +			test -n "$l3"
 +		then
 +			# This begins with three non-empty lines.  Is this a
 +			# piece of e-mail a-la RFC2822?  Grab all the headers,
 +			# discarding the indented remainder of folded lines,
 +			# and see if it looks like that they all begin with the
 +			# header field names...
 +			sed -n -e '/^$/q' -e '/^[ 	]/d' -e p "$1" |
 +			egrep -v '^[A-Za-z]+(-[A-Za-z]+)*:' >/dev/null ||
 +			patch_format=mbox
 +		fi
  	} < "$1" || clean_abort
  }
  
  split_patches () {
  	case "$patch_format" in
  	mbox)
 -		git mailsplit -d"$prec" -o"$dotest" -b -- "$@" > "$dotest/last" ||
 +		case "$rebasing" in
 +		'')
 +			keep_cr= ;;
 +		?*)
 +			keep_cr=--keep-cr ;;
 +		esac
 +		git mailsplit -d"$prec" -o"$dotest" -b $keep_cr -- "$@" > "$dotest/last" ||
  		clean_abort
  		;;
  	stgit-series)
@@@ -274,11 -256,7 +276,11 @@@
  		msgnum=
  		;;
  	*)
 -		clean_abort "Patch format $patch_format is not supported."
 +		if test -n "$parse_patch" ; then
 +			clean_abort "Patch format $patch_format is not supported."
 +		else
 +			clean_abort "Patch format detection failed."
 +		fi
  		;;
  	esac
  }
@@@ -327,7 -305,7 +329,7 @@@ d
  		git_apply_opt="$git_apply_opt $(sq "$1$2")"; shift ;;
  	--patch-format)
  		shift ; patch_format="$1" ;;
- 	--reject)
+ 	--reject|--ignore-whitespace|--ignore-space-change)
  		git_apply_opt="$git_apply_opt $1" ;;
  	--committer-date-is-author-date)
  		committer_date_is_author_date=t ;;
diff --combined git-rebase.sh
index 3555d17a5d,d741752c7c..2315d95a9f
--- a/git-rebase.sh
+++ b/git-rebase.sh
@@@ -333,6 -333,9 +333,9 @@@ d
  			;;
  		esac
  		;;
+ 	--ignore-whitespace)
+ 		git_am_opt="$git_am_opt $1"
+ 		;;
  	--committer-date-is-author-date|--ignore-date)
  		git_am_opt="$git_am_opt $1"
  		force_rebase=t
@@@ -382,10 -385,8 +385,10 @@@ els
  fi
  
  # The tree must be really really clean.
 -if ! git update-index --ignore-submodules --refresh; then
 -	die "cannot rebase: you have unstaged changes"
 +if ! git update-index --ignore-submodules --refresh > /dev/null; then
 +	echo >&2 "cannot rebase: you have unstaged changes"
 +	git diff --name-status -r --ignore-submodules -- >&2
 +	exit 1
  fi
  diff=$(git diff-index --cached --name-status -r --ignore-submodules HEAD --)
  case "$diff" in