diff.c: buffer all output if asked to
[gitweb.git] / diff.c
diff --git a/diff.c b/diff.c
index 2db0d7c0f5c63c2e764d88cd04b1f5c335df20fe..31720abf8f9cc6ce8e62de9a67afb54d8e20a7a8 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -605,6 +605,47 @@ enum diff_symbol {
 #define DIFF_SYMBOL_CONTENT_BLANK_LINE_EOF (1<<16)
 #define DIFF_SYMBOL_CONTENT_WS_MASK (WSEH_NEW | WSEH_OLD | WSEH_CONTEXT | WS_RULE_MASK)
 
+/*
+ * This struct is used when we need to buffer the output of the diff output.
+ *
+ * NEEDSWORK: Instead of storing a copy of the line, add an offset pointer
+ * into the pre/post image file. This pointer could be a union with the
+ * line pointer. By storing an offset into the file instead of the literal line,
+ * we can decrease the memory footprint for the buffered output. At first we
+ * may want to only have indirection for the content lines, but we could also
+ * enhance the state for emitting prefabricated lines, e.g. the similarity
+ * score line or hunk/file headers would only need to store a number or path
+ * and then the output can be constructed later on depending on state.
+ */
+struct emitted_diff_symbol {
+       const char *line;
+       int len;
+       int flags;
+       enum diff_symbol s;
+};
+#define EMITTED_DIFF_SYMBOL_INIT {NULL}
+
+struct emitted_diff_symbols {
+       struct emitted_diff_symbol *buf;
+       int nr, alloc;
+};
+#define EMITTED_DIFF_SYMBOLS_INIT {NULL, 0, 0}
+
+static void append_emitted_diff_symbol(struct diff_options *o,
+                                      struct emitted_diff_symbol *e)
+{
+       struct emitted_diff_symbol *f;
+
+       ALLOC_GROW(o->emitted_symbols->buf,
+                  o->emitted_symbols->nr + 1,
+                  o->emitted_symbols->alloc);
+       f = &o->emitted_symbols->buf[o->emitted_symbols->nr++];
+
+       memcpy(f, e, sizeof(struct emitted_diff_symbol));
+       f->line = e->line ? xmemdupz(e->line, e->len) : NULL;
+}
+
+
 static void emit_line_ws_markup(struct diff_options *o,
                                const char *set, const char *reset,
                                const char *line, int len, char sign,
@@ -631,12 +672,18 @@ static void emit_line_ws_markup(struct diff_options *o,
        }
 }
 
-static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
-                            const char *line, int len, unsigned flags)
+static void emit_diff_symbol_from_struct(struct diff_options *o,
+                                        struct emitted_diff_symbol *eds)
 {
        static const char *nneof = " No newline at end of file\n";
        const char *context, *reset, *set, *meta, *fraginfo;
        struct strbuf sb = STRBUF_INIT;
+
+       enum diff_symbol s = eds->s;
+       const char *line = eds->line;
+       int len = eds->len;
+       unsigned flags = eds->flags;
+
        switch (s) {
        case DIFF_SYMBOL_NO_LF_EOF:
                context = diff_get_color_opt(o, DIFF_CONTEXT);
@@ -778,6 +825,17 @@ static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
        strbuf_release(&sb);
 }
 
+static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
+                            const char *line, int len, unsigned flags)
+{
+       struct emitted_diff_symbol e = {line, len, flags, s};
+
+       if (o->emitted_symbols)
+               append_emitted_diff_symbol(o, &e);
+       else
+               emit_diff_symbol_from_struct(o, &e);
+}
+
 void diff_emit_submodule_del(struct diff_options *o, const char *line)
 {
        emit_diff_symbol(o, DIFF_SYMBOL_SUBMODULE_DEL, line, strlen(line), 0);
@@ -1374,9 +1432,29 @@ static void diff_words_show(struct diff_words_data *diff_words)
 /* In "color-words" mode, show word-diff of words accumulated in the buffer */
 static void diff_words_flush(struct emit_callback *ecbdata)
 {
+       struct diff_options *wo = ecbdata->diff_words->opt;
+
        if (ecbdata->diff_words->minus.text.size ||
            ecbdata->diff_words->plus.text.size)
                diff_words_show(ecbdata->diff_words);
+
+       if (wo->emitted_symbols) {
+               struct diff_options *o = ecbdata->opt;
+               struct emitted_diff_symbols *wol = wo->emitted_symbols;
+               int i;
+
+               /*
+                * NEEDSWORK:
+                * Instead of appending each, concat all words to a line?
+                */
+               for (i = 0; i < wol->nr; i++)
+                       append_emitted_diff_symbol(o, &wol->buf[i]);
+
+               for (i = 0; i < wol->nr; i++)
+                       free((void *)wol->buf[i].line);
+
+               wol->nr = 0;
+       }
 }
 
 static void diff_filespec_load_driver(struct diff_filespec *one)
@@ -1412,6 +1490,11 @@ static void init_diff_words_data(struct emit_callback *ecbdata,
                xcalloc(1, sizeof(struct diff_words_data));
        ecbdata->diff_words->type = o->word_diff;
        ecbdata->diff_words->opt = o;
+
+       if (orig_opts->emitted_symbols)
+               o->emitted_symbols =
+                       xcalloc(1, sizeof(struct emitted_diff_symbols));
+
        if (!o->word_regex)
                o->word_regex = userdiff_word_regex(one);
        if (!o->word_regex)
@@ -1446,6 +1529,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
 {
        if (ecbdata->diff_words) {
                diff_words_flush(ecbdata);
+               free (ecbdata->diff_words->opt->emitted_symbols);
                free (ecbdata->diff_words->opt);
                free (ecbdata->diff_words->minus.text.ptr);
                free (ecbdata->diff_words->minus.orig);
@@ -4996,16 +5080,37 @@ void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc)
 static void diff_flush_patch_all_file_pairs(struct diff_options *o)
 {
        int i;
+       static struct emitted_diff_symbols esm = EMITTED_DIFF_SYMBOLS_INIT;
        struct diff_queue_struct *q = &diff_queued_diff;
 
        if (WSEH_NEW & WS_RULE_MASK)
                die("BUG: WS rules bit mask overlaps with diff symbol flags");
 
+       /*
+        * For testing purposes we want to make sure the diff machinery
+        * works completely with the buffer. If there is anything emitted
+        * outside the emit_string, then the order is screwed
+        * up and the tests will fail.
+        *
+        * TODO (later in this series):
+        * We'll unset this pointer in a later patch.
+        */
+       o->emitted_symbols = &esm;
+
        for (i = 0; i < q->nr; i++) {
                struct diff_filepair *p = q->queue[i];
                if (check_pair_status(p))
                        diff_flush_patch(p, o);
        }
+
+       if (o->emitted_symbols) {
+               for (i = 0; i < esm.nr; i++)
+                       emit_diff_symbol_from_struct(o, &esm.buf[i]);
+
+               for (i = 0; i < esm.nr; i++)
+                       free((void *)esm.buf[i].line);
+       }
+       esm.nr = 0;
 }
 
 void diff_flush(struct diff_options *options)