combine-diff.con commit combine-diff: better hunk splitting. (3ec1909)
   1#include "cache.h"
   2#include "commit.h"
   3#include "diff.h"
   4#include "diffcore.h"
   5#include "quote.h"
   6
   7struct path_list {
   8        struct path_list *next;
   9        int len;
  10        char *path;
  11        unsigned char sha1[20];
  12        unsigned char parent_sha1[FLEX_ARRAY][20];
  13};
  14
  15static int uninteresting(struct diff_filepair *p)
  16{
  17        if (diff_unmodified_pair(p))
  18                return 1;
  19        if (!S_ISREG(p->one->mode) || !S_ISREG(p->two->mode))
  20                return 1;
  21        return 0;
  22}
  23
  24static struct path_list *intersect_paths(struct path_list *curr,
  25                                         int n, int num_parent)
  26{
  27        struct diff_queue_struct *q = &diff_queued_diff;
  28        struct path_list *p;
  29        int i;
  30
  31        if (!n) {
  32                struct path_list *list = NULL, **tail = &list;
  33                for (i = 0; i < q->nr; i++) {
  34                        int len;
  35                        const char *path;
  36                        if (uninteresting(q->queue[i]))
  37                                continue;
  38                        path = q->queue[i]->two->path;
  39                        len = strlen(path);
  40
  41                        p = xmalloc(sizeof(*p) + len + 1 + num_parent * 20);
  42                        p->path = (char*) &(p->parent_sha1[num_parent][0]);
  43                        memcpy(p->path, path, len);
  44                        p->path[len] = 0;
  45                        p->len = len;
  46                        p->next = NULL;
  47                        memcpy(p->sha1, q->queue[i]->two->sha1, 20);
  48                        memcpy(p->parent_sha1[n], q->queue[i]->one->sha1, 20);
  49                        *tail = p;
  50                        tail = &p->next;
  51                }
  52                return list;
  53        }
  54
  55        for (p = curr; p; p = p->next) {
  56                int found = 0;
  57                if (!p->len)
  58                        continue;
  59                for (i = 0; i < q->nr; i++) {
  60                        const char *path;
  61                        int len;
  62
  63                        if (uninteresting(q->queue[i]))
  64                                continue;
  65                        path = q->queue[i]->two->path;
  66                        len = strlen(path);
  67                        if (len == p->len && !memcmp(path, p->path, len)) {
  68                                found = 1;
  69                                memcpy(p->parent_sha1[n],
  70                                       q->queue[i]->one->sha1, 20);
  71                                break;
  72                        }
  73                }
  74                if (!found)
  75                        p->len = 0;
  76        }
  77        return curr;
  78}
  79
  80struct lline {
  81        struct lline *next;
  82        int len;
  83        unsigned long parent_map;
  84        char line[FLEX_ARRAY];
  85};
  86
  87struct sline {
  88        struct lline *lost_head, **lost_tail;
  89        char *bol;
  90        int len;
  91        unsigned long flag;
  92};
  93
  94static char *grab_blob(const unsigned char *sha1, unsigned long *size)
  95{
  96        char *blob;
  97        char type[20];
  98        if (!memcmp(sha1, null_sha1, 20)) {
  99                /* deleted blob */
 100                *size = 0;
 101                return xcalloc(1, 1);
 102        }
 103        blob = read_sha1_file(sha1, type, size);
 104        if (strcmp(type, "blob"))
 105                die("object '%s' is not a blob!", sha1_to_hex(sha1));
 106        return blob;
 107}
 108
 109#define TMPPATHLEN 50
 110#define MAXLINELEN 10240
 111
 112static void write_to_temp_file(char *tmpfile, void *blob, unsigned long size)
 113{
 114        int fd = git_mkstemp(tmpfile, TMPPATHLEN, ".diff_XXXXXX");
 115        if (fd < 0)
 116                die("unable to create temp-file");
 117        if (write(fd, blob, size) != size)
 118                die("unable to write temp-file");
 119        close(fd);
 120}
 121
 122static void write_temp_blob(char *tmpfile, const unsigned char *sha1)
 123{
 124        unsigned long size;
 125        void *blob;
 126        blob = grab_blob(sha1, &size);
 127        write_to_temp_file(tmpfile, blob, size);
 128        free(blob);
 129}
 130
 131static int parse_num(char **cp_p, unsigned int *num_p)
 132{
 133        char *cp = *cp_p;
 134        unsigned int num = 0;
 135        int read_some;
 136
 137        while ('0' <= *cp && *cp <= '9')
 138                num = num * 10 + *cp++ - '0';
 139        if (!(read_some = cp - *cp_p))
 140                return -1;
 141        *cp_p = cp;
 142        *num_p = num;
 143        return 0;
 144}
 145
 146static int parse_hunk_header(char *line, int len,
 147                             unsigned int *ob, unsigned int *on,
 148                             unsigned int *nb, unsigned int *nn)
 149{
 150        char *cp;
 151        cp = line + 4;
 152        if (parse_num(&cp, ob)) {
 153        bad_line:
 154                return error("malformed diff output: %s", line);
 155        }
 156        if (*cp == ',') {
 157                cp++;
 158                if (parse_num(&cp, on))
 159                        goto bad_line;
 160        }
 161        else
 162                *on = 1;
 163        if (*cp++ != ' ' || *cp++ != '+')
 164                goto bad_line;
 165        if (parse_num(&cp, nb))
 166                goto bad_line;
 167        if (*cp == ',') {
 168                cp++;
 169                if (parse_num(&cp, nn))
 170                        goto bad_line;
 171        }
 172        else
 173                *nn = 1;
 174        return -!!memcmp(cp, " @@", 3);
 175}
 176
 177static void append_lost(struct sline *sline, int n, const char *line)
 178{
 179        struct lline *lline;
 180        int len = strlen(line);
 181        unsigned long this_mask = (1UL<<n);
 182        if (line[len-1] == '\n')
 183                len--;
 184
 185        /* Check to see if we can squash things */
 186        if (sline->lost_head) {
 187                struct lline *last_one = NULL;
 188                /* We cannot squash it with earlier one */
 189                for (lline = sline->lost_head;
 190                     lline;
 191                     lline = lline->next)
 192                        if (lline->parent_map & this_mask)
 193                                last_one = lline;
 194                lline = last_one ? last_one->next : sline->lost_head;
 195                while (lline) {
 196                        if (lline->len == len &&
 197                            !memcmp(lline->line, line, len)) {
 198                                lline->parent_map |= this_mask;
 199                                return;
 200                        }
 201                        lline = lline->next;
 202                }
 203        }
 204
 205        lline = xmalloc(sizeof(*lline) + len + 1);
 206        lline->len = len;
 207        lline->next = NULL;
 208        lline->parent_map = this_mask;
 209        memcpy(lline->line, line, len);
 210        lline->line[len] = 0;
 211        *sline->lost_tail = lline;
 212        sline->lost_tail = &lline->next;
 213}
 214
 215static void combine_diff(const unsigned char *parent, const char *ourtmp,
 216                         struct sline *sline, int cnt, int n)
 217{
 218        FILE *in;
 219        char parent_tmp[TMPPATHLEN];
 220        char cmd[TMPPATHLEN * 2 + 1024];
 221        char line[MAXLINELEN];
 222        unsigned int lno, ob, on, nb, nn;
 223        unsigned long pmask = ~(1UL << n);
 224        struct sline *lost_bucket = NULL;
 225
 226        write_temp_blob(parent_tmp, parent);
 227        sprintf(cmd, "diff --unified=0 -La/x -Lb/x '%s' '%s'",
 228                parent_tmp, ourtmp);
 229        in = popen(cmd, "r");
 230        if (!in)
 231                return;
 232
 233        lno = 1;
 234        while (fgets(line, sizeof(line), in) != NULL) {
 235                int len = strlen(line);
 236                if (5 < len && !memcmp("@@ -", line, 4)) {
 237                        if (parse_hunk_header(line, len,
 238                                              &ob, &on, &nb, &nn))
 239                                break;
 240                        lno = nb;
 241                        if (!nb) {
 242                                /* @@ -1,2 +0,0 @@ to remove the
 243                                 * first two lines...
 244                                 */
 245                                nb = 1;
 246                        }
 247                        lost_bucket = &sline[nb-1]; /* sline is 0 based */
 248                        continue;
 249                }
 250                if (!lost_bucket)
 251                        continue;
 252                switch (line[0]) {
 253                case '-':
 254                        append_lost(lost_bucket, n, line+1);
 255                        break;
 256                case '+':
 257                        sline[lno-1].flag &= pmask;
 258                        lno++;
 259                        break;
 260                }
 261        }
 262        fclose(in);
 263        unlink(parent_tmp);
 264}
 265
 266static unsigned long context = 3;
 267static char combine_marker = '@';
 268
 269static int interesting(struct sline *sline, unsigned long all_mask)
 270{
 271        return ((sline->flag & all_mask) != all_mask || sline->lost_head);
 272}
 273
 274static unsigned long line_common_diff(struct sline *sline, unsigned long all_mask)
 275{
 276        /*
 277         * Look at the line and see from which parents we have the
 278         * same difference.
 279         */
 280
 281        /* Lower bits of sline->flag records if the parent had this
 282         * line, so XOR with all_mask gives us on-bits for parents we
 283         * have differences with.
 284         */
 285        unsigned long common_adds = (sline->flag ^ all_mask) & all_mask;
 286        unsigned long common_removes = all_mask;
 287
 288        /* If all the parents have this line, that also counts as
 289         * having the same difference.
 290         */
 291        if (!common_adds)
 292                common_adds = all_mask;
 293
 294        if (sline->lost_head) {
 295                /* Lost head list records the lines removed from
 296                 * the parents, and parent_map records from which
 297                 * parent the line was removed.
 298                 */
 299                struct lline *ll;
 300                for (ll = sline->lost_head; ll; ll = ll->next) {
 301                        common_removes &= ll->parent_map;
 302                }
 303        }
 304        return common_adds & common_removes;
 305}
 306
 307static unsigned long line_all_diff(struct sline *sline, unsigned long all_mask)
 308{
 309        /*
 310         * Look at the line and see from which parents we have some difference.
 311         */
 312        unsigned long different = (sline->flag ^ all_mask) & all_mask;
 313        if (sline->lost_head) {
 314                /* Lost head list records the lines removed from
 315                 * the parents, and parent_map records from which
 316                 * parent the line was removed.
 317                 */
 318                struct lline *ll;
 319                for (ll = sline->lost_head; ll; ll = ll->next) {
 320                        different |= ll->parent_map;
 321                }
 322        }
 323        return different;
 324}
 325
 326static unsigned long adjust_hunk_tail(struct sline *sline,
 327                                      unsigned long all_mask,
 328                                      unsigned long hunk_begin,
 329                                      unsigned long i)
 330{
 331        /* i points at the first uninteresting line.
 332         * If the last line of the hunk was interesting
 333         * only because it has some deletion, then
 334         * it is not all that interesting for the
 335         * purpose of giving trailing context lines.
 336         */
 337        if ((hunk_begin + 1 <= i) &&
 338            ((sline[i-1].flag & all_mask) == all_mask))
 339                i--;
 340        return i;
 341}
 342
 343static unsigned long next_interesting(struct sline *sline,
 344                                      unsigned long mark,
 345                                      unsigned long i,
 346                                      unsigned long cnt,
 347                                      int uninteresting)
 348{
 349        while (i < cnt)
 350                if (uninteresting ?
 351                    !(sline[i].flag & mark) :
 352                    (sline[i].flag & mark))
 353                        return i;
 354                else
 355                        i++;
 356        return cnt;
 357}
 358
 359static int give_context(struct sline *sline, unsigned long cnt, int num_parent)
 360{
 361        unsigned long all_mask = (1UL<<num_parent) - 1;
 362        unsigned long mark = (1UL<<num_parent);
 363        unsigned long i;
 364
 365        i = next_interesting(sline, mark, 0, cnt, 0);
 366        if (cnt <= i)
 367                return 0;
 368
 369        while (i < cnt) {
 370                unsigned long j = (context < i) ? (i - context) : 0;
 371                unsigned long k;
 372                while (j < i)
 373                        sline[j++].flag |= mark;
 374
 375        again:
 376                j = next_interesting(sline, mark, i, cnt, 1);
 377                if (cnt <= j)
 378                        break; /* the rest are all interesting */
 379
 380                /* lookahead context lines */
 381                k = next_interesting(sline, mark, j, cnt, 0);
 382                j = adjust_hunk_tail(sline, all_mask, i, j);
 383
 384                if (k < j + context) {
 385                        /* k is interesting and [j,k) are not, but
 386                         * paint them interesting because the gap is small.
 387                         */
 388                        while (j < k)
 389                                sline[j++].flag |= mark;
 390                        i = k;
 391                        goto again;
 392                }
 393
 394                /* j is the first uninteresting line and there is
 395                 * no overlap beyond it within context lines.
 396                 */
 397                i = k;
 398                k = (j + context < cnt) ? j + context : cnt;
 399                while (j < k)
 400                        sline[j++].flag |= mark;
 401        }
 402        return 1;
 403}
 404
 405static int make_hunks(struct sline *sline, unsigned long cnt,
 406                       int num_parent, int dense)
 407{
 408        unsigned long all_mask = (1UL<<num_parent) - 1;
 409        unsigned long mark = (1UL<<num_parent);
 410        unsigned long i;
 411        int has_interesting = 0;
 412
 413        for (i = 0; i < cnt; i++) {
 414                if (interesting(&sline[i], all_mask))
 415                        sline[i].flag |= mark;
 416                else
 417                        sline[i].flag &= ~mark;
 418        }
 419        if (!dense)
 420                return give_context(sline, cnt, num_parent);
 421
 422        /* Look at each hunk, and if we have changes from only one
 423         * parent, or the changes are the same from all but one
 424         * parent, mark that uninteresting.
 425         */
 426        i = 0;
 427        while (i < cnt) {
 428                unsigned long j, hunk_begin, hunk_end;
 429                int same, diff;
 430                unsigned long same_diff, all_diff;
 431                while (i < cnt && !(sline[i].flag & mark))
 432                        i++;
 433                if (cnt <= i)
 434                        break; /* No more interesting hunks */
 435                hunk_begin = i;
 436                for (j = i + 1; j < cnt; j++) {
 437                        if (!(sline[j].flag & mark)) {
 438                                /* Look beyond the end to see if there
 439                                 * is an interesting line after this
 440                                 * hunk within context span.
 441                                 */
 442                                unsigned long la; /* lookahead */
 443                                int contin = 0;
 444                                la = adjust_hunk_tail(sline, all_mask,
 445                                                     hunk_begin, j);
 446                                la = (la + context < cnt) ?
 447                                        (la + context) : cnt;
 448                                while (j <= --la) {
 449                                        if (sline[la].flag & mark) {
 450                                                contin = 1;
 451                                                break;
 452                                        }
 453                                }
 454                                if (!contin)
 455                                        break;
 456                                j = la;
 457                        }
 458                }
 459                hunk_end = j;
 460
 461                /* [i..hunk_end) are interesting.  Now does it have
 462                 * the same change with all but one parent?
 463                 */
 464                same_diff = all_mask;
 465                all_diff = 0;
 466                for (j = i; j < hunk_end; j++) {
 467                        same_diff &= line_common_diff(sline + j, all_mask);
 468                        all_diff |= line_all_diff(sline + j, all_mask);
 469                }
 470                diff = same = 0;
 471                for (j = 0; j < num_parent; j++) {
 472                        if (same_diff & (1UL<<j))
 473                                same++;
 474                        if (all_diff & (1UL<<j))
 475                                diff++;
 476                }
 477                if ((num_parent - 1 <= same) || (diff == 1)) {
 478                        /* This hunk is not that interesting after all */
 479                        for (j = hunk_begin; j < hunk_end; j++)
 480                                sline[j].flag &= ~mark;
 481                }
 482                i = hunk_end;
 483        }
 484
 485        has_interesting = give_context(sline, cnt, num_parent);
 486        return has_interesting;
 487}
 488
 489static void dump_sline(struct sline *sline, int cnt, int num_parent)
 490{
 491        unsigned long mark = (1UL<<num_parent);
 492        int i;
 493        int lno = 0;
 494
 495        while (1) {
 496                struct sline *sl = &sline[lno];
 497                int hunk_end;
 498                while (lno < cnt && !(sline[lno].flag & mark))
 499                        lno++;
 500                if (cnt <= lno)
 501                        break;
 502                for (hunk_end = lno + 1; hunk_end < cnt; hunk_end++)
 503                        if (!(sline[hunk_end].flag & mark))
 504                                break;
 505                for (i = 0; i <= num_parent; i++) putchar(combine_marker);
 506                printf(" +%d,%d ", lno+1, hunk_end-lno);
 507                for (i = 0; i <= num_parent; i++) putchar(combine_marker);
 508                putchar('\n');
 509                while (lno < hunk_end) {
 510                        struct lline *ll;
 511                        int j;
 512                        sl = &sline[lno++];
 513                        ll = sl->lost_head;
 514                        while (ll) {
 515                                for (j = 0; j < num_parent; j++) {
 516                                        if (ll->parent_map & (1UL<<j))
 517                                                putchar('-');
 518                                        else
 519                                                putchar(' ');
 520                                }
 521                                puts(ll->line);
 522                                ll = ll->next;
 523                        }
 524                        for (j = 0; j < num_parent; j++) {
 525                                if ((1UL<<j) & sl->flag)
 526                                        putchar(' ');
 527                                else
 528                                        putchar('+');
 529                        }
 530                        printf("%.*s\n", sl->len, sl->bol);
 531                }
 532        }
 533}
 534
 535static int show_combined_diff(struct path_list *elem, int num_parent,
 536                              int dense, const char *header, int show_empty)
 537{
 538        unsigned long size, cnt, lno;
 539        char *result, *cp, *ep;
 540        struct sline *sline; /* survived lines */
 541        int i, show_hunks, shown_header = 0;
 542        char ourtmp[TMPPATHLEN];
 543
 544        /* Read the result of merge first */
 545        result = grab_blob(elem->sha1, &size);
 546        write_to_temp_file(ourtmp, result, size);
 547
 548        for (cnt = 0, cp = result; cp - result < size; cp++) {
 549                if (*cp == '\n')
 550                        cnt++;
 551        }
 552        if (result[size-1] != '\n')
 553                cnt++; /* incomplete line */
 554
 555        sline = xcalloc(cnt, sizeof(*sline));
 556        ep = result;
 557        sline[0].bol = result;
 558        for (lno = 0, cp = result; cp - result < size; cp++) {
 559                if (*cp == '\n') {
 560                        sline[lno].lost_tail = &sline[lno].lost_head;
 561                        sline[lno].len = cp - sline[lno].bol;
 562                        sline[lno].flag = (1UL<<num_parent) - 1;
 563                        lno++;
 564                        if (lno < cnt)
 565                                sline[lno].bol = cp + 1;
 566                }
 567        }
 568        if (result[size-1] != '\n') {
 569                sline[cnt-1].lost_tail = &sline[cnt-1].lost_head;
 570                sline[cnt-1].len = size - (sline[cnt-1].bol - result);
 571                sline[cnt-1].flag = (1UL<<num_parent) - 1;
 572        }
 573
 574        for (i = 0; i < num_parent; i++)
 575                combine_diff(elem->parent_sha1[i], ourtmp, sline, cnt, i);
 576
 577        show_hunks = make_hunks(sline, cnt, num_parent, dense);
 578
 579        if (header && (show_hunks || show_empty)) {
 580                shown_header++;
 581                puts(header);
 582        }
 583        if (show_hunks) {
 584                printf("diff --%s ", dense ? "cc" : "combined");
 585                if (quote_c_style(elem->path, NULL, NULL, 0))
 586                        quote_c_style(elem->path, NULL, stdout, 0);
 587                else
 588                        printf("%s", elem->path);
 589                putchar('\n');
 590                dump_sline(sline, cnt, num_parent);
 591        }
 592        unlink(ourtmp);
 593        free(result);
 594
 595        for (i = 0; i < cnt; i++) {
 596                if (sline[i].lost_head) {
 597                        struct lline *ll = sline[i].lost_head;
 598                        while (ll) {
 599                                struct lline *tmp = ll;
 600                                ll = ll->next;
 601                                free(tmp);
 602                        }
 603                }
 604        }
 605        free(sline);
 606        return shown_header;
 607}
 608
 609int diff_tree_combined_merge(const unsigned char *sha1,
 610                             const char *header,
 611                             int show_empty_merge, int dense)
 612{
 613        struct commit *commit = lookup_commit(sha1);
 614        struct diff_options diffopts;
 615        struct commit_list *parents;
 616        struct path_list *p, *paths = NULL;
 617        int num_parent, i, num_paths;
 618
 619        diff_setup(&diffopts);
 620        diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
 621        diffopts.recursive = 1;
 622
 623        /* count parents */
 624        for (parents = commit->parents, num_parent = 0;
 625             parents;
 626             parents = parents->next, num_parent++)
 627                ; /* nothing */
 628
 629        /* find set of paths that everybody touches */
 630        for (parents = commit->parents, i = 0;
 631             parents;
 632             parents = parents->next, i++) {
 633                struct commit *parent = parents->item;
 634                diff_tree_sha1(parent->object.sha1, commit->object.sha1, "",
 635                               &diffopts);
 636                paths = intersect_paths(paths, i, num_parent);
 637                diff_flush(&diffopts);
 638        }
 639
 640        /* find out surviving paths */
 641        for (num_paths = 0, p = paths; p; p = p->next) {
 642                if (p->len)
 643                        num_paths++;
 644        }
 645        if (num_paths || show_empty_merge) {
 646                for (p = paths; p; p = p->next) {
 647                        if (!p->len)
 648                                continue;
 649                        if (show_combined_diff(p, num_parent, dense, header,
 650                                               show_empty_merge))
 651                                header = NULL;
 652                }
 653        }
 654
 655        /* Clean things up */
 656        while (paths) {
 657                struct path_list *tmp = paths;
 658                paths = paths->next;
 659                free(tmp);
 660        }
 661        return 0;
 662}