From e29e1147e485654d90a0ea0fd5fb7151bb194265 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Tue, 28 Feb 2006 20:16:41 -0800
Subject: [PATCH] diffcore-delta: stop using deltifier for packing.

This switches the change estimation logic used by break, rename
and copy detection from delta packing code to a more line
oriented one.  This way, thee performance-density tradeoff by
delta packing code can be made without worrying about breaking
the rename detection.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 diffcore-delta.c | 141 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 113 insertions(+), 28 deletions(-)

diff --git a/diffcore-delta.c b/diffcore-delta.c
index 1e6a6911ec..d03787be65 100644
--- a/diffcore-delta.c
+++ b/diffcore-delta.c
@@ -1,43 +1,128 @@
 #include "cache.h"
 #include "diff.h"
 #include "diffcore.h"
-#include "delta.h"
-#include "count-delta.h"
-
-static int diffcore_count_changes_1(void *src, unsigned long src_size,
-				    void *dst, unsigned long dst_size,
-				    unsigned long delta_limit,
-				    unsigned long *src_copied,
-				    unsigned long *literal_added)
-{
-	void *delta;
-	unsigned long delta_size;
-
-	delta = diff_delta(src, src_size,
-			   dst, dst_size,
-			   &delta_size, delta_limit);
-	if (!delta)
-		/* If delta_limit is exceeded, we have too much differences */
-		return -1;
 
-	/* Estimate the edit size by interpreting delta. */
-	if (count_delta(delta, delta_size, src_copied, literal_added)) {
-		free(delta);
-		return -1;
+struct linehash {
+	unsigned long bytes;
+	unsigned long hash;
+};
+
+static unsigned long hash_extended_line(const unsigned char **buf_p,
+					unsigned long left)
+{
+	/* An extended line is zero or more whitespace letters (including LF)
+	 * followed by one non whitespace letter followed by zero or more
+	 * non LF, and terminated with by a LF (or EOF).
+	 */
+	const unsigned char *bol = *buf_p;
+	const unsigned char *buf = bol;
+	unsigned long hashval = 0;
+	while (left) {
+		unsigned c = *buf++;
+		if (!c)
+			goto binary;
+		left--;
+		if (' ' < c) {
+			hashval = c;
+			break;
+		}
+	}
+	while (left) {
+		unsigned c = *buf++;
+		if (!c)
+			goto binary;
+		left--;
+		if (c == '\n')
+			break;
+		if (' ' < c)
+			hashval = hashval * 11 + c;
 	}
-	free(delta);
+	*buf_p = buf;
+	return hashval;
+
+ binary:
+	*buf_p = NULL;
+	return 0;
+}
+
+static int linehash_compare(const void *a_, const void *b_)
+{
+	struct linehash *a = (struct linehash *) a_;
+	struct linehash *b = (struct linehash *) b_;
+	if (a->hash < b->hash) return -1;
+	if (a->hash > b->hash) return 1;
 	return 0;
 }
 
+static struct linehash *hash_lines(const unsigned char *buf,
+				   unsigned long size)
+{
+	const unsigned char *eobuf = buf + size;
+	struct linehash *line = NULL;
+	int alloc = 0, used = 0;
+
+	while (buf < eobuf) {
+		const unsigned char *ptr = buf;
+		unsigned long hash = hash_extended_line(&buf, eobuf-ptr);
+		if (!buf) {
+			free(line);
+			return NULL;
+		}
+		if (alloc <= used) {
+			alloc = alloc_nr(alloc);
+			line = xrealloc(line, sizeof(*line) * alloc);
+		}
+		line[used].bytes = buf - ptr;
+		line[used].hash = hash;
+		used++;
+	}
+	qsort(line, used, sizeof(*line), linehash_compare);
+
+	/* Terminate the list */
+	if (alloc <= used)
+		line = xrealloc(line, sizeof(*line) * (used+1));
+	line[used].bytes = line[used].hash = 0;
+	return line;
+}
+
 int diffcore_count_changes(void *src, unsigned long src_size,
 			   void *dst, unsigned long dst_size,
 			   unsigned long delta_limit,
 			   unsigned long *src_copied,
 			   unsigned long *literal_added)
 {
-	return diffcore_count_changes_1(src, src_size,
-					dst, dst_size,
-					delta_limit,
-					src_copied,
-					literal_added);
+	struct linehash *src_lines, *dst_lines;
+	unsigned long sc, la;
+
+	src_lines = hash_lines(src, src_size);
+	if (!src_lines)
+		return -1;
+	dst_lines = hash_lines(dst, dst_size);
+	if (!dst_lines) {
+		free(src_lines);
+		return -1;
+	}
+	sc = la = 0;
+	while (src_lines->bytes && dst_lines->bytes) {
+		int cmp = linehash_compare(src_lines, dst_lines);
+		if (!cmp) {
+			sc += src_lines->bytes;
+			src_lines++;
+			dst_lines++;
+			continue;
+		}
+		if (cmp < 0) {
+			src_lines++;
+			continue;
+		}
+		la += dst_lines->bytes;
+		dst_lines++;
+	}
+	while (dst_lines->bytes) {
+		la += dst_lines->bytes;
+		dst_lines++;
+	}
+	*src_copied = sc;
+	*literal_added = la;
+	return 0;
 }
-- 
2.48.1