71883a4b7d06ddf2b4df6e7a731287013ed34b6d
   1#include "cache.h"
   2#include "range-diff.h"
   3#include "string-list.h"
   4#include "run-command.h"
   5#include "argv-array.h"
   6#include "hashmap.h"
   7#include "xdiff-interface.h"
   8#include "linear-assignment.h"
   9#include "diffcore.h"
  10
  11struct patch_util {
  12        /* For the search for an exact match */
  13        struct hashmap_entry e;
  14        const char *diff, *patch;
  15
  16        int i, shown;
  17        int diffsize;
  18        size_t diff_offset;
  19        /* the index of the matching item in the other branch, or -1 */
  20        int matching;
  21        struct object_id oid;
  22};
  23
  24/*
  25 * Reads the patches into a string list, with the `util` field being populated
  26 * as struct object_id (will need to be free()d).
  27 */
  28static int read_patches(const char *range, struct string_list *list)
  29{
  30        struct child_process cp = CHILD_PROCESS_INIT;
  31        FILE *in;
  32        struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT;
  33        struct patch_util *util = NULL;
  34        int in_header = 1;
  35
  36        argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges",
  37                        "--reverse", "--date-order", "--decorate=no",
  38                        "--no-abbrev-commit", range,
  39                        NULL);
  40        cp.out = -1;
  41        cp.no_stdin = 1;
  42        cp.git_cmd = 1;
  43
  44        if (start_command(&cp))
  45                return error_errno(_("could not start `log`"));
  46        in = fdopen(cp.out, "r");
  47        if (!in) {
  48                error_errno(_("could not read `log` output"));
  49                finish_command(&cp);
  50                return -1;
  51        }
  52
  53        while (strbuf_getline(&line, in) != EOF) {
  54                const char *p;
  55
  56                if (skip_prefix(line.buf, "commit ", &p)) {
  57                        if (util) {
  58                                string_list_append(list, buf.buf)->util = util;
  59                                strbuf_reset(&buf);
  60                        }
  61                        util = xcalloc(sizeof(*util), 1);
  62                        if (get_oid(p, &util->oid)) {
  63                                error(_("could not parse commit '%s'"), p);
  64                                free(util);
  65                                string_list_clear(list, 1);
  66                                strbuf_release(&buf);
  67                                strbuf_release(&line);
  68                                fclose(in);
  69                                finish_command(&cp);
  70                                return -1;
  71                        }
  72                        util->matching = -1;
  73                        in_header = 1;
  74                        continue;
  75                }
  76
  77                if (starts_with(line.buf, "diff --git")) {
  78                        in_header = 0;
  79                        strbuf_addch(&buf, '\n');
  80                        if (!util->diff_offset)
  81                                util->diff_offset = buf.len;
  82                        strbuf_addbuf(&buf, &line);
  83                } else if (in_header) {
  84                        if (starts_with(line.buf, "Author: ")) {
  85                                strbuf_addbuf(&buf, &line);
  86                                strbuf_addstr(&buf, "\n\n");
  87                        } else if (starts_with(line.buf, "    ")) {
  88                                strbuf_addbuf(&buf, &line);
  89                                strbuf_addch(&buf, '\n');
  90                        }
  91                        continue;
  92                } else if (starts_with(line.buf, "@@ "))
  93                        strbuf_addstr(&buf, "@@");
  94                else if (!line.buf[0] || starts_with(line.buf, "index "))
  95                        /*
  96                         * A completely blank (not ' \n', which is context)
  97                         * line is not valid in a diff.  We skip it
  98                         * silently, because this neatly handles the blank
  99                         * separator line between commits in git-log
 100                         * output.
 101                         *
 102                         * We also want to ignore the diff's `index` lines
 103                         * because they contain exact blob hashes in which
 104                         * we are not interested.
 105                         */
 106                        continue;
 107                else
 108                        strbuf_addbuf(&buf, &line);
 109
 110                strbuf_addch(&buf, '\n');
 111                util->diffsize++;
 112        }
 113        fclose(in);
 114        strbuf_release(&line);
 115
 116        if (util)
 117                string_list_append(list, buf.buf)->util = util;
 118        strbuf_release(&buf);
 119
 120        if (finish_command(&cp))
 121                return -1;
 122
 123        return 0;
 124}
 125
 126static int patch_util_cmp(const void *dummy, const struct patch_util *a,
 127                     const struct patch_util *b, const char *keydata)
 128{
 129        return strcmp(a->diff, keydata ? keydata : b->diff);
 130}
 131
 132static void find_exact_matches(struct string_list *a, struct string_list *b)
 133{
 134        struct hashmap map;
 135        int i;
 136
 137        hashmap_init(&map, (hashmap_cmp_fn)patch_util_cmp, NULL, 0);
 138
 139        /* First, add the patches of a to a hash map */
 140        for (i = 0; i < a->nr; i++) {
 141                struct patch_util *util = a->items[i].util;
 142
 143                util->i = i;
 144                util->patch = a->items[i].string;
 145                util->diff = util->patch + util->diff_offset;
 146                hashmap_entry_init(util, strhash(util->diff));
 147                hashmap_add(&map, util);
 148        }
 149
 150        /* Now try to find exact matches in b */
 151        for (i = 0; i < b->nr; i++) {
 152                struct patch_util *util = b->items[i].util, *other;
 153
 154                util->i = i;
 155                util->patch = b->items[i].string;
 156                util->diff = util->patch + util->diff_offset;
 157                hashmap_entry_init(util, strhash(util->diff));
 158                other = hashmap_remove(&map, util, NULL);
 159                if (other) {
 160                        if (other->matching >= 0)
 161                                BUG("already assigned!");
 162
 163                        other->matching = i;
 164                        util->matching = other->i;
 165                }
 166        }
 167
 168        hashmap_free(&map, 0);
 169}
 170
 171static void diffsize_consume(void *data, char *line, unsigned long len)
 172{
 173        (*(int *)data)++;
 174}
 175
 176static int diffsize(const char *a, const char *b)
 177{
 178        xpparam_t pp = { 0 };
 179        xdemitconf_t cfg = { 0 };
 180        mmfile_t mf1, mf2;
 181        int count = 0;
 182
 183        mf1.ptr = (char *)a;
 184        mf1.size = strlen(a);
 185        mf2.ptr = (char *)b;
 186        mf2.size = strlen(b);
 187
 188        cfg.ctxlen = 3;
 189        if (!xdi_diff_outf(&mf1, &mf2, diffsize_consume, &count, &pp, &cfg))
 190                return count;
 191
 192        error(_("failed to generate diff"));
 193        return COST_MAX;
 194}
 195
 196static void get_correspondences(struct string_list *a, struct string_list *b,
 197                                int creation_factor)
 198{
 199        int n = a->nr + b->nr;
 200        int *cost, c, *a2b, *b2a;
 201        int i, j;
 202
 203        ALLOC_ARRAY(cost, st_mult(n, n));
 204        ALLOC_ARRAY(a2b, n);
 205        ALLOC_ARRAY(b2a, n);
 206
 207        for (i = 0; i < a->nr; i++) {
 208                struct patch_util *a_util = a->items[i].util;
 209
 210                for (j = 0; j < b->nr; j++) {
 211                        struct patch_util *b_util = b->items[j].util;
 212
 213                        if (a_util->matching == j)
 214                                c = 0;
 215                        else if (a_util->matching < 0 && b_util->matching < 0)
 216                                c = diffsize(a_util->diff, b_util->diff);
 217                        else
 218                                c = COST_MAX;
 219                        cost[i + n * j] = c;
 220                }
 221
 222                c = a_util->matching < 0 ?
 223                        a_util->diffsize * creation_factor / 100 : COST_MAX;
 224                for (j = b->nr; j < n; j++)
 225                        cost[i + n * j] = c;
 226        }
 227
 228        for (j = 0; j < b->nr; j++) {
 229                struct patch_util *util = b->items[j].util;
 230
 231                c = util->matching < 0 ?
 232                        util->diffsize * creation_factor / 100 : COST_MAX;
 233                for (i = a->nr; i < n; i++)
 234                        cost[i + n * j] = c;
 235        }
 236
 237        for (i = a->nr; i < n; i++)
 238                for (j = b->nr; j < n; j++)
 239                        cost[i + n * j] = 0;
 240
 241        compute_assignment(n, n, cost, a2b, b2a);
 242
 243        for (i = 0; i < a->nr; i++)
 244                if (a2b[i] >= 0 && a2b[i] < b->nr) {
 245                        struct patch_util *a_util = a->items[i].util;
 246                        struct patch_util *b_util = b->items[a2b[i]].util;
 247
 248                        a_util->matching = a2b[i];
 249                        b_util->matching = i;
 250                }
 251
 252        free(cost);
 253        free(a2b);
 254        free(b2a);
 255}
 256
 257static const char *short_oid(struct patch_util *util)
 258{
 259        return find_unique_abbrev(&util->oid, DEFAULT_ABBREV);
 260}
 261
 262static struct diff_filespec *get_filespec(const char *name, const char *p)
 263{
 264        struct diff_filespec *spec = alloc_filespec(name);
 265
 266        fill_filespec(spec, &null_oid, 0, 0644);
 267        spec->data = (char *)p;
 268        spec->size = strlen(p);
 269        spec->should_munmap = 0;
 270        spec->is_stdin = 1;
 271
 272        return spec;
 273}
 274
 275static void patch_diff(const char *a, const char *b,
 276                              struct diff_options *diffopt)
 277{
 278        diff_queue(&diff_queued_diff,
 279                   get_filespec("a", a), get_filespec("b", b));
 280
 281        diffcore_std(diffopt);
 282        diff_flush(diffopt);
 283}
 284
 285static void output(struct string_list *a, struct string_list *b,
 286                   struct diff_options *diffopt)
 287{
 288        int i = 0, j = 0;
 289
 290        /*
 291         * We assume the user is really more interested in the second argument
 292         * ("newer" version). To that end, we print the output in the order of
 293         * the RHS (the `b` parameter). To put the LHS (the `a` parameter)
 294         * commits that are no longer in the RHS into a good place, we place
 295         * them once we have shown all of their predecessors in the LHS.
 296         */
 297
 298        while (i < a->nr || j < b->nr) {
 299                struct patch_util *a_util, *b_util;
 300                a_util = i < a->nr ? a->items[i].util : NULL;
 301                b_util = j < b->nr ? b->items[j].util : NULL;
 302
 303                /* Skip all the already-shown commits from the LHS. */
 304                while (i < a->nr && a_util->shown)
 305                        a_util = ++i < a->nr ? a->items[i].util : NULL;
 306
 307                /* Show unmatched LHS commit whose predecessors were shown. */
 308                if (i < a->nr && a_util->matching < 0) {
 309                        printf("%d: %s < -: --------\n",
 310                               i + 1, short_oid(a_util));
 311                        i++;
 312                        continue;
 313                }
 314
 315                /* Show unmatched RHS commits. */
 316                while (j < b->nr && b_util->matching < 0) {
 317                        printf("-: -------- > %d: %s\n",
 318                               j + 1, short_oid(b_util));
 319                        b_util = ++j < b->nr ? b->items[j].util : NULL;
 320                }
 321
 322                /* Show matching LHS/RHS pair. */
 323                if (j < b->nr) {
 324                        a_util = a->items[b_util->matching].util;
 325                        printf("%d: %s ! %d: %s\n",
 326                               b_util->matching + 1, short_oid(a_util),
 327                               j + 1, short_oid(b_util));
 328                        if (!(diffopt->output_format & DIFF_FORMAT_NO_OUTPUT))
 329                                patch_diff(a->items[b_util->matching].string,
 330                                           b->items[j].string, diffopt);
 331                        a_util->shown = 1;
 332                        j++;
 333                }
 334        }
 335}
 336
 337int show_range_diff(const char *range1, const char *range2,
 338                    int creation_factor, struct diff_options *diffopt)
 339{
 340        int res = 0;
 341
 342        struct string_list branch1 = STRING_LIST_INIT_DUP;
 343        struct string_list branch2 = STRING_LIST_INIT_DUP;
 344
 345        if (read_patches(range1, &branch1))
 346                res = error(_("could not parse log for '%s'"), range1);
 347        if (!res && read_patches(range2, &branch2))
 348                res = error(_("could not parse log for '%s'"), range2);
 349
 350        if (!res) {
 351                find_exact_matches(&branch1, &branch2);
 352                get_correspondences(&branch1, &branch2, creation_factor);
 353                output(&branch1, &branch2, diffopt);
 354        }
 355
 356        string_list_clear(&branch1, 1);
 357        string_list_clear(&branch2, 1);
 358
 359        return res;
 360}