1/*
2 * LibXDiff by Davide Libenzi ( File Differential Library )
3 * Copyright (C) 2003-2006 Davide Libenzi, Johannes E. Schindelin
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see
17 * <http://www.gnu.org/licenses/>.
18 *
19 * Davide Libenzi <davidel@xmailserver.org>
20 *
21 */
22
23#include "xinclude.h"
24
25typedef struct s_xdmerge {
26 struct s_xdmerge *next;
27 /*
28 * 0 = conflict,
29 * 1 = no conflict, take first,
30 * 2 = no conflict, take second.
31 * 3 = no conflict, take both.
32 */
33 int mode;
34 /*
35 * These point at the respective postimages. E.g. <i1,chg1> is
36 * how side #1 wants to change the common ancestor; if there is no
37 * overlap, lines before i1 in the postimage of side #1 appear
38 * in the merge result as a region touched by neither side.
39 */
40 long i1, i2;
41 long chg1, chg2;
42 /*
43 * These point at the preimage; of course there is just one
44 * preimage, that is from the shared common ancestor.
45 */
46 long i0;
47 long chg0;
48} xdmerge_t;
49
50static int xdl_append_merge(xdmerge_t **merge, int mode,
51 long i0, long chg0,
52 long i1, long chg1,
53 long i2, long chg2)
54{
55 xdmerge_t *m = *merge;
56 if (m && (i1 <= m->i1 + m->chg1 || i2 <= m->i2 + m->chg2)) {
57 if (mode != m->mode)
58 m->mode = 0;
59 m->chg0 = i0 + chg0 - m->i0;
60 m->chg1 = i1 + chg1 - m->i1;
61 m->chg2 = i2 + chg2 - m->i2;
62 } else {
63 m = xdl_malloc(sizeof(xdmerge_t));
64 if (!m)
65 return -1;
66 m->next = NULL;
67 m->mode = mode;
68 m->i0 = i0;
69 m->chg0 = chg0;
70 m->i1 = i1;
71 m->chg1 = chg1;
72 m->i2 = i2;
73 m->chg2 = chg2;
74 if (*merge)
75 (*merge)->next = m;
76 *merge = m;
77 }
78 return 0;
79}
80
81static int xdl_cleanup_merge(xdmerge_t *c)
82{
83 int count = 0;
84 xdmerge_t *next_c;
85
86 /* were there conflicts? */
87 for (; c; c = next_c) {
88 if (c->mode == 0)
89 count++;
90 next_c = c->next;
91 free(c);
92 }
93 return count;
94}
95
96static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2,
97 int line_count, long flags)
98{
99 int i;
100 xrecord_t **rec1 = xe1->xdf2.recs + i1;
101 xrecord_t **rec2 = xe2->xdf2.recs + i2;
102
103 for (i = 0; i < line_count; i++) {
104 int result = xdl_recmatch(rec1[i]->ptr, rec1[i]->size,
105 rec2[i]->ptr, rec2[i]->size, flags);
106 if (!result)
107 return -1;
108 }
109 return 0;
110}
111
112static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest)
113{
114 xrecord_t **recs;
115 int size = 0;
116
117 recs = (use_orig ? xe->xdf1.recs : xe->xdf2.recs) + i;
118
119 if (count < 1)
120 return 0;
121
122 for (i = 0; i < count; size += recs[i++]->size)
123 if (dest)
124 memcpy(dest + size, recs[i]->ptr, recs[i]->size);
125 if (add_nl) {
126 i = recs[count - 1]->size;
127 if (i == 0 || recs[count - 1]->ptr[i - 1] != '\n') {
128 if (needs_cr) {
129 if (dest)
130 dest[size] = '\r';
131 size++;
132 }
133
134 if (dest)
135 dest[size] = '\n';
136 size++;
137 }
138 }
139 return size;
140}
141
142static int xdl_recs_copy(xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest)
143{
144 return xdl_recs_copy_0(0, xe, i, count, needs_cr, add_nl, dest);
145}
146
147static int xdl_orig_copy(xdfenv_t *xe, int i, int count, int needs_cr, int add_nl, char *dest)
148{
149 return xdl_recs_copy_0(1, xe, i, count, needs_cr, add_nl, dest);
150}
151
152/*
153 * Returns 1 if the i'th line ends in CR/LF (if it is the last line and
154 * has no eol, the preceding line, if any), 0 if it ends in LF-only, and
155 * -1 if the line ending cannot be determined.
156 */
157static int is_eol_crlf(xdfile_t *file, int i)
158{
159 long size;
160
161 if (i < file->nrec - 1)
162 /* All lines before the last *must* end in LF */
163 return (size = file->recs[i]->size) > 1 &&
164 file->recs[i]->ptr[size - 2] == '\r';
165 if (!file->nrec)
166 /* Cannot determine eol style from empty file */
167 return -1;
168 if ((size = file->recs[i]->size) &&
169 file->recs[i]->ptr[size - 1] == '\n')
170 /* Last line; ends in LF; Is it CR/LF? */
171 return size > 1 &&
172 file->recs[i]->ptr[size - 2] == '\r';
173 if (!i)
174 /* The only line has no eol */
175 return -1;
176 /* Determine eol from second-to-last line */
177 return (size = file->recs[i - 1]->size) > 1 &&
178 file->recs[i - 1]->ptr[size - 2] == '\r';
179}
180
181static int is_cr_needed(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m)
182{
183 int needs_cr;
184
185 /* Match post-images' preceding, or first, lines' end-of-line style */
186 needs_cr = is_eol_crlf(&xe1->xdf2, m->i1 ? m->i1 - 1 : 0);
187 if (needs_cr)
188 needs_cr = is_eol_crlf(&xe2->xdf2, m->i2 ? m->i2 - 1 : 0);
189 /* Look at pre-image's first line, unless we already settled on LF */
190 if (needs_cr)
191 needs_cr = is_eol_crlf(&xe1->xdf1, 0);
192 /* If still undecided, use LF-only */
193 return needs_cr < 0 ? 0 : needs_cr;
194}
195
196static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1,
197 xdfenv_t *xe2, const char *name2,
198 const char *name3,
199 int size, int i, int style,
200 xdmerge_t *m, char *dest, int marker_size)
201{
202 int marker1_size = (name1 ? strlen(name1) + 1 : 0);
203 int marker2_size = (name2 ? strlen(name2) + 1 : 0);
204 int marker3_size = (name3 ? strlen(name3) + 1 : 0);
205 int needs_cr = is_cr_needed(xe1, xe2, m);
206
207 if (marker_size <= 0)
208 marker_size = DEFAULT_CONFLICT_MARKER_SIZE;
209
210 /* Before conflicting part */
211 size += xdl_recs_copy(xe1, i, m->i1 - i, 0, 0,
212 dest ? dest + size : NULL);
213
214 if (!dest) {
215 size += marker_size + 1 + needs_cr + marker1_size;
216 } else {
217 memset(dest + size, '<', marker_size);
218 size += marker_size;
219 if (marker1_size) {
220 dest[size] = ' ';
221 memcpy(dest + size + 1, name1, marker1_size - 1);
222 size += marker1_size;
223 }
224 if (needs_cr)
225 dest[size++] = '\r';
226 dest[size++] = '\n';
227 }
228
229 /* Postimage from side #1 */
230 size += xdl_recs_copy(xe1, m->i1, m->chg1, needs_cr, 1,
231 dest ? dest + size : NULL);
232
233 if (style == XDL_MERGE_DIFF3) {
234 /* Shared preimage */
235 if (!dest) {
236 size += marker_size + 1 + needs_cr + marker3_size;
237 } else {
238 memset(dest + size, '|', marker_size);
239 size += marker_size;
240 if (marker3_size) {
241 dest[size] = ' ';
242 memcpy(dest + size + 1, name3, marker3_size - 1);
243 size += marker3_size;
244 }
245 if (needs_cr)
246 dest[size++] = '\r';
247 dest[size++] = '\n';
248 }
249 size += xdl_orig_copy(xe1, m->i0, m->chg0, needs_cr, 1,
250 dest ? dest + size : NULL);
251 }
252
253 if (!dest) {
254 size += marker_size + 1 + needs_cr;
255 } else {
256 memset(dest + size, '=', marker_size);
257 size += marker_size;
258 if (needs_cr)
259 dest[size++] = '\r';
260 dest[size++] = '\n';
261 }
262
263 /* Postimage from side #2 */
264 size += xdl_recs_copy(xe2, m->i2, m->chg2, needs_cr, 1,
265 dest ? dest + size : NULL);
266 if (!dest) {
267 size += marker_size + 1 + needs_cr + marker2_size;
268 } else {
269 memset(dest + size, '>', marker_size);
270 size += marker_size;
271 if (marker2_size) {
272 dest[size] = ' ';
273 memcpy(dest + size + 1, name2, marker2_size - 1);
274 size += marker2_size;
275 }
276 if (needs_cr)
277 dest[size++] = '\r';
278 dest[size++] = '\n';
279 }
280 return size;
281}
282
283static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1,
284 xdfenv_t *xe2, const char *name2,
285 const char *ancestor_name,
286 int favor,
287 xdmerge_t *m, char *dest, int style,
288 int marker_size)
289{
290 int size, i;
291
292 for (size = i = 0; m; m = m->next) {
293 if (favor && !m->mode)
294 m->mode = favor;
295
296 if (m->mode == 0)
297 size = fill_conflict_hunk(xe1, name1, xe2, name2,
298 ancestor_name,
299 size, i, style, m, dest,
300 marker_size);
301 else if (m->mode & 3) {
302 /* Before conflicting part */
303 size += xdl_recs_copy(xe1, i, m->i1 - i, 0, 0,
304 dest ? dest + size : NULL);
305 /* Postimage from side #1 */
306 if (m->mode & 1) {
307 int needs_cr = is_cr_needed(xe1, xe2, m);
308
309 size += xdl_recs_copy(xe1, m->i1, m->chg1, needs_cr, (m->mode & 2),
310 dest ? dest + size : NULL);
311 }
312 /* Postimage from side #2 */
313 if (m->mode & 2)
314 size += xdl_recs_copy(xe2, m->i2, m->chg2, 0, 0,
315 dest ? dest + size : NULL);
316 } else
317 continue;
318 i = m->i1 + m->chg1;
319 }
320 size += xdl_recs_copy(xe1, i, xe1->xdf2.nrec - i, 0, 0,
321 dest ? dest + size : NULL);
322 return size;
323}
324
325/*
326 * Sometimes, changes are not quite identical, but differ in only a few
327 * lines. Try hard to show only these few lines as conflicting.
328 */
329static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
330 xpparam_t const *xpp)
331{
332 for (; m; m = m->next) {
333 mmfile_t t1, t2;
334 xdfenv_t xe;
335 xdchange_t *xscr, *x;
336 int i1 = m->i1, i2 = m->i2;
337
338 /* let's handle just the conflicts */
339 if (m->mode)
340 continue;
341
342 /* no sense refining a conflict when one side is empty */
343 if (m->chg1 == 0 || m->chg2 == 0)
344 continue;
345
346 /*
347 * This probably does not work outside git, since
348 * we have a very simple mmfile structure.
349 */
350 t1.ptr = (char *)xe1->xdf2.recs[m->i1]->ptr;
351 t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1]->ptr
352 + xe1->xdf2.recs[m->i1 + m->chg1 - 1]->size - t1.ptr;
353 t2.ptr = (char *)xe2->xdf2.recs[m->i2]->ptr;
354 t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1]->ptr
355 + xe2->xdf2.recs[m->i2 + m->chg2 - 1]->size - t2.ptr;
356 if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0)
357 return -1;
358 if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
359 xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
360 xdl_build_script(&xe, &xscr) < 0) {
361 xdl_free_env(&xe);
362 return -1;
363 }
364 if (!xscr) {
365 /* If this happens, the changes are identical. */
366 xdl_free_env(&xe);
367 m->mode = 4;
368 continue;
369 }
370 x = xscr;
371 m->i1 = xscr->i1 + i1;
372 m->chg1 = xscr->chg1;
373 m->i2 = xscr->i2 + i2;
374 m->chg2 = xscr->chg2;
375 while (xscr->next) {
376 xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t));
377 if (!m2) {
378 xdl_free_env(&xe);
379 xdl_free_script(x);
380 return -1;
381 }
382 xscr = xscr->next;
383 m2->next = m->next;
384 m->next = m2;
385 m = m2;
386 m->mode = 0;
387 m->i1 = xscr->i1 + i1;
388 m->chg1 = xscr->chg1;
389 m->i2 = xscr->i2 + i2;
390 m->chg2 = xscr->chg2;
391 }
392 xdl_free_env(&xe);
393 xdl_free_script(x);
394 }
395 return 0;
396}
397
398static int line_contains_alnum(const char *ptr, long size)
399{
400 while (size--)
401 if (isalnum((unsigned char)*(ptr++)))
402 return 1;
403 return 0;
404}
405
406static int lines_contain_alnum(xdfenv_t *xe, int i, int chg)
407{
408 for (; chg; chg--, i++)
409 if (line_contains_alnum(xe->xdf2.recs[i]->ptr,
410 xe->xdf2.recs[i]->size))
411 return 1;
412 return 0;
413}
414
415/*
416 * This function merges m and m->next, marking everything between those hunks
417 * as conflicting, too.
418 */
419static void xdl_merge_two_conflicts(xdmerge_t *m)
420{
421 xdmerge_t *next_m = m->next;
422 m->chg1 = next_m->i1 + next_m->chg1 - m->i1;
423 m->chg2 = next_m->i2 + next_m->chg2 - m->i2;
424 m->next = next_m->next;
425 free(next_m);
426}
427
428/*
429 * If there are less than 3 non-conflicting lines between conflicts,
430 * it appears simpler -- because it takes up less (or as many) lines --
431 * if the lines are moved into the conflicts.
432 */
433static int xdl_simplify_non_conflicts(xdfenv_t *xe1, xdmerge_t *m,
434 int simplify_if_no_alnum)
435{
436 int result = 0;
437
438 if (!m)
439 return result;
440 for (;;) {
441 xdmerge_t *next_m = m->next;
442 int begin, end;
443
444 if (!next_m)
445 return result;
446
447 begin = m->i1 + m->chg1;
448 end = next_m->i1;
449
450 if (m->mode != 0 || next_m->mode != 0 ||
451 (end - begin > 3 &&
452 (!simplify_if_no_alnum ||
453 lines_contain_alnum(xe1, begin, end - begin)))) {
454 m = next_m;
455 } else {
456 result++;
457 xdl_merge_two_conflicts(m);
458 }
459 }
460}
461
462/*
463 * level == 0: mark all overlapping changes as conflict
464 * level == 1: mark overlapping changes as conflict only if not identical
465 * level == 2: analyze non-identical changes for minimal conflict set
466 * level == 3: analyze non-identical changes for minimal conflict set, but
467 * treat hunks not containing any letter or number as conflicting
468 *
469 * returns < 0 on error, == 0 for no conflicts, else number of conflicts
470 */
471static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
472 xdfenv_t *xe2, xdchange_t *xscr2,
473 xmparam_t const *xmp, mmbuffer_t *result)
474{
475 xdmerge_t *changes, *c;
476 xpparam_t const *xpp = &xmp->xpp;
477 const char *const ancestor_name = xmp->ancestor;
478 const char *const name1 = xmp->file1;
479 const char *const name2 = xmp->file2;
480 int i0, i1, i2, chg0, chg1, chg2;
481 int level = xmp->level;
482 int style = xmp->style;
483 int favor = xmp->favor;
484
485 if (style == XDL_MERGE_DIFF3) {
486 /*
487 * "diff3 -m" output does not make sense for anything
488 * more aggressive than XDL_MERGE_EAGER.
489 */
490 if (XDL_MERGE_EAGER < level)
491 level = XDL_MERGE_EAGER;
492 }
493
494 c = changes = NULL;
495
496 while (xscr1 && xscr2) {
497 if (!changes)
498 changes = c;
499 if (xscr1->i1 + xscr1->chg1 < xscr2->i1) {
500 i0 = xscr1->i1;
501 i1 = xscr1->i2;
502 i2 = xscr2->i2 - xscr2->i1 + xscr1->i1;
503 chg0 = xscr1->chg1;
504 chg1 = xscr1->chg2;
505 chg2 = xscr1->chg1;
506 if (xdl_append_merge(&c, 1,
507 i0, chg0, i1, chg1, i2, chg2)) {
508 xdl_cleanup_merge(changes);
509 return -1;
510 }
511 xscr1 = xscr1->next;
512 continue;
513 }
514 if (xscr2->i1 + xscr2->chg1 < xscr1->i1) {
515 i0 = xscr2->i1;
516 i1 = xscr1->i2 - xscr1->i1 + xscr2->i1;
517 i2 = xscr2->i2;
518 chg0 = xscr2->chg1;
519 chg1 = xscr2->chg1;
520 chg2 = xscr2->chg2;
521 if (xdl_append_merge(&c, 2,
522 i0, chg0, i1, chg1, i2, chg2)) {
523 xdl_cleanup_merge(changes);
524 return -1;
525 }
526 xscr2 = xscr2->next;
527 continue;
528 }
529 if (level == XDL_MERGE_MINIMAL || xscr1->i1 != xscr2->i1 ||
530 xscr1->chg1 != xscr2->chg1 ||
531 xscr1->chg2 != xscr2->chg2 ||
532 xdl_merge_cmp_lines(xe1, xscr1->i2,
533 xe2, xscr2->i2,
534 xscr1->chg2, xpp->flags)) {
535 /* conflict */
536 int off = xscr1->i1 - xscr2->i1;
537 int ffo = off + xscr1->chg1 - xscr2->chg1;
538
539 i0 = xscr1->i1;
540 i1 = xscr1->i2;
541 i2 = xscr2->i2;
542 if (off > 0) {
543 i0 -= off;
544 i1 -= off;
545 }
546 else
547 i2 += off;
548 chg0 = xscr1->i1 + xscr1->chg1 - i0;
549 chg1 = xscr1->i2 + xscr1->chg2 - i1;
550 chg2 = xscr2->i2 + xscr2->chg2 - i2;
551 if (ffo < 0) {
552 chg0 -= ffo;
553 chg1 -= ffo;
554 } else
555 chg2 += ffo;
556 if (xdl_append_merge(&c, 0,
557 i0, chg0, i1, chg1, i2, chg2)) {
558 xdl_cleanup_merge(changes);
559 return -1;
560 }
561 }
562
563 i1 = xscr1->i1 + xscr1->chg1;
564 i2 = xscr2->i1 + xscr2->chg1;
565
566 if (i1 >= i2)
567 xscr2 = xscr2->next;
568 if (i2 >= i1)
569 xscr1 = xscr1->next;
570 }
571 while (xscr1) {
572 if (!changes)
573 changes = c;
574 i0 = xscr1->i1;
575 i1 = xscr1->i2;
576 i2 = xscr1->i1 + xe2->xdf2.nrec - xe2->xdf1.nrec;
577 chg0 = xscr1->chg1;
578 chg1 = xscr1->chg2;
579 chg2 = xscr1->chg1;
580 if (xdl_append_merge(&c, 1,
581 i0, chg0, i1, chg1, i2, chg2)) {
582 xdl_cleanup_merge(changes);
583 return -1;
584 }
585 xscr1 = xscr1->next;
586 }
587 while (xscr2) {
588 if (!changes)
589 changes = c;
590 i0 = xscr2->i1;
591 i1 = xscr2->i1 + xe1->xdf2.nrec - xe1->xdf1.nrec;
592 i2 = xscr2->i2;
593 chg0 = xscr2->chg1;
594 chg1 = xscr2->chg1;
595 chg2 = xscr2->chg2;
596 if (xdl_append_merge(&c, 2,
597 i0, chg0, i1, chg1, i2, chg2)) {
598 xdl_cleanup_merge(changes);
599 return -1;
600 }
601 xscr2 = xscr2->next;
602 }
603 if (!changes)
604 changes = c;
605 /* refine conflicts */
606 if (XDL_MERGE_ZEALOUS <= level &&
607 (xdl_refine_conflicts(xe1, xe2, changes, xpp) < 0 ||
608 xdl_simplify_non_conflicts(xe1, changes,
609 XDL_MERGE_ZEALOUS < level) < 0)) {
610 xdl_cleanup_merge(changes);
611 return -1;
612 }
613 /* output */
614 if (result) {
615 int marker_size = xmp->marker_size;
616 int size = xdl_fill_merge_buffer(xe1, name1, xe2, name2,
617 ancestor_name,
618 favor, changes, NULL, style,
619 marker_size);
620 result->ptr = xdl_malloc(size);
621 if (!result->ptr) {
622 xdl_cleanup_merge(changes);
623 return -1;
624 }
625 result->size = size;
626 xdl_fill_merge_buffer(xe1, name1, xe2, name2,
627 ancestor_name, favor, changes,
628 result->ptr, style, marker_size);
629 }
630 return xdl_cleanup_merge(changes);
631}
632
633int xdl_merge(mmfile_t *orig, mmfile_t *mf1, mmfile_t *mf2,
634 xmparam_t const *xmp, mmbuffer_t *result)
635{
636 xdchange_t *xscr1, *xscr2;
637 xdfenv_t xe1, xe2;
638 int status;
639 xpparam_t const *xpp = &xmp->xpp;
640
641 result->ptr = NULL;
642 result->size = 0;
643
644 if (xdl_do_diff(orig, mf1, xpp, &xe1) < 0) {
645 return -1;
646 }
647 if (xdl_do_diff(orig, mf2, xpp, &xe2) < 0) {
648 xdl_free_env(&xe1);
649 return -1;
650 }
651 if (xdl_change_compact(&xe1.xdf1, &xe1.xdf2, xpp->flags) < 0 ||
652 xdl_change_compact(&xe1.xdf2, &xe1.xdf1, xpp->flags) < 0 ||
653 xdl_build_script(&xe1, &xscr1) < 0) {
654 xdl_free_env(&xe1);
655 return -1;
656 }
657 if (xdl_change_compact(&xe2.xdf1, &xe2.xdf2, xpp->flags) < 0 ||
658 xdl_change_compact(&xe2.xdf2, &xe2.xdf1, xpp->flags) < 0 ||
659 xdl_build_script(&xe2, &xscr2) < 0) {
660 xdl_free_script(xscr1);
661 xdl_free_env(&xe1);
662 xdl_free_env(&xe2);
663 return -1;
664 }
665 status = 0;
666 if (!xscr1) {
667 result->ptr = xdl_malloc(mf2->size);
668 memcpy(result->ptr, mf2->ptr, mf2->size);
669 result->size = mf2->size;
670 } else if (!xscr2) {
671 result->ptr = xdl_malloc(mf1->size);
672 memcpy(result->ptr, mf1->ptr, mf1->size);
673 result->size = mf1->size;
674 } else {
675 status = xdl_do_merge(&xe1, xscr1,
676 &xe2, xscr2,
677 xmp, result);
678 }
679 xdl_free_script(xscr1);
680 xdl_free_script(xscr2);
681
682 xdl_free_env(&xe1);
683 xdl_free_env(&xe2);
684
685 return status;
686}