xdiff / xemit.con commit directory rename detection: testcases exploring possibly suboptimal merges (362ab31)
   1/*
   2 *  LibXDiff by Davide Libenzi ( File Differential Library )
   3 *  Copyright (C) 2003  Davide Libenzi
   4 *
   5 *  This library is free software; you can redistribute it and/or
   6 *  modify it under the terms of the GNU Lesser General Public
   7 *  License as published by the Free Software Foundation; either
   8 *  version 2.1 of the License, or (at your option) any later version.
   9 *
  10 *  This library is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 *  Lesser General Public License for more details.
  14 *
  15 *  You should have received a copy of the GNU Lesser General Public
  16 *  License along with this library; if not, see
  17 *  <http://www.gnu.org/licenses/>.
  18 *
  19 *  Davide Libenzi <davidel@xmailserver.org>
  20 *
  21 */
  22
  23#include "xinclude.h"
  24
  25static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) {
  26
  27        *rec = xdf->recs[ri]->ptr;
  28
  29        return xdf->recs[ri]->size;
  30}
  31
  32
  33static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *ecb) {
  34        long size, psize = strlen(pre);
  35        char const *rec;
  36
  37        size = xdl_get_rec(xdf, ri, &rec);
  38        if (xdl_emit_diffrec(rec, size, pre, psize, ecb) < 0) {
  39
  40                return -1;
  41        }
  42
  43        return 0;
  44}
  45
  46
  47/*
  48 * Starting at the passed change atom, find the latest change atom to be included
  49 * inside the differential hunk according to the specified configuration.
  50 * Also advance xscr if the first changes must be discarded.
  51 */
  52xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
  53{
  54        xdchange_t *xch, *xchp, *lxch;
  55        long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
  56        long max_ignorable = xecfg->ctxlen;
  57        unsigned long ignored = 0; /* number of ignored blank lines */
  58
  59        /* remove ignorable changes that are too far before other changes */
  60        for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
  61                xch = xchp->next;
  62
  63                if (xch == NULL ||
  64                    xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
  65                        *xscr = xch;
  66        }
  67
  68        if (*xscr == NULL)
  69                return NULL;
  70
  71        lxch = *xscr;
  72
  73        for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
  74                long distance = xch->i1 - (xchp->i1 + xchp->chg1);
  75                if (distance > max_common)
  76                        break;
  77
  78                if (distance < max_ignorable && (!xch->ignore || lxch == xchp)) {
  79                        lxch = xch;
  80                        ignored = 0;
  81                } else if (distance < max_ignorable && xch->ignore) {
  82                        ignored += xch->chg2;
  83                } else if (lxch != xchp &&
  84                           xch->i1 + ignored - (lxch->i1 + lxch->chg1) > max_common) {
  85                        break;
  86                } else if (!xch->ignore) {
  87                        lxch = xch;
  88                        ignored = 0;
  89                } else {
  90                        ignored += xch->chg2;
  91                }
  92        }
  93
  94        return lxch;
  95}
  96
  97
  98static long def_ff(const char *rec, long len, char *buf, long sz, void *priv)
  99{
 100        if (len > 0 &&
 101                        (isalpha((unsigned char)*rec) || /* identifier? */
 102                         *rec == '_' || /* also identifier? */
 103                         *rec == '$')) { /* identifiers from VMS and other esoterico */
 104                if (len > sz)
 105                        len = sz;
 106                while (0 < len && isspace((unsigned char)rec[len - 1]))
 107                        len--;
 108                memcpy(buf, rec, len);
 109                return len;
 110        }
 111        return -1;
 112}
 113
 114static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri,
 115                           char *buf, long sz)
 116{
 117        const char *rec;
 118        long len = xdl_get_rec(xdf, ri, &rec);
 119        if (!xecfg->find_func)
 120                return def_ff(rec, len, buf, sz, xecfg->find_func_priv);
 121        return xecfg->find_func(rec, len, buf, sz, xecfg->find_func_priv);
 122}
 123
 124static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri)
 125{
 126        char dummy[1];
 127        return match_func_rec(xdf, xecfg, ri, dummy, sizeof(dummy)) >= 0;
 128}
 129
 130struct func_line {
 131        long len;
 132        char buf[80];
 133};
 134
 135static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg,
 136                          struct func_line *func_line, long start, long limit)
 137{
 138        long l, size, step = (start > limit) ? -1 : 1;
 139        char *buf, dummy[1];
 140
 141        buf = func_line ? func_line->buf : dummy;
 142        size = func_line ? sizeof(func_line->buf) : sizeof(dummy);
 143
 144        for (l = start; l != limit && 0 <= l && l < xe->xdf1.nrec; l += step) {
 145                long len = match_func_rec(&xe->xdf1, xecfg, l, buf, size);
 146                if (len >= 0) {
 147                        if (func_line)
 148                                func_line->len = len;
 149                        return l;
 150                }
 151        }
 152        return -1;
 153}
 154
 155static int is_empty_rec(xdfile_t *xdf, long ri)
 156{
 157        const char *rec;
 158        long len = xdl_get_rec(xdf, ri, &rec);
 159
 160        while (len > 0 && XDL_ISSPACE(*rec)) {
 161                rec++;
 162                len--;
 163        }
 164        return !len;
 165}
 166
 167int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 168                  xdemitconf_t const *xecfg) {
 169        long s1, s2, e1, e2, lctx;
 170        xdchange_t *xch, *xche;
 171        long funclineprev = -1;
 172        struct func_line func_line = { 0 };
 173
 174        for (xch = xscr; xch; xch = xche->next) {
 175                xche = xdl_get_hunk(&xch, xecfg);
 176                if (!xch)
 177                        break;
 178
 179                s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
 180                s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
 181
 182                if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) {
 183                        long fs1, i1 = xch->i1;
 184
 185                        /* Appended chunk? */
 186                        if (i1 >= xe->xdf1.nrec) {
 187                                long i2 = xch->i2;
 188
 189                                /*
 190                                 * We don't need additional context if
 191                                 * a whole function was added.
 192                                 */
 193                                while (i2 < xe->xdf2.nrec) {
 194                                        if (is_func_rec(&xe->xdf2, xecfg, i2))
 195                                                goto post_context_calculation;
 196                                        i2++;
 197                                }
 198
 199                                /*
 200                                 * Otherwise get more context from the
 201                                 * pre-image.
 202                                 */
 203                                i1 = xe->xdf1.nrec - 1;
 204                        }
 205
 206                        fs1 = get_func_line(xe, xecfg, NULL, i1, -1);
 207                        while (fs1 > 0 && !is_empty_rec(&xe->xdf1, fs1 - 1) &&
 208                               !is_func_rec(&xe->xdf1, xecfg, fs1 - 1))
 209                                fs1--;
 210                        if (fs1 < 0)
 211                                fs1 = 0;
 212                        if (fs1 < s1) {
 213                                s2 -= s1 - fs1;
 214                                s1 = fs1;
 215                        }
 216                }
 217
 218 post_context_calculation:
 219                lctx = xecfg->ctxlen;
 220                lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1));
 221                lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2));
 222
 223                e1 = xche->i1 + xche->chg1 + lctx;
 224                e2 = xche->i2 + xche->chg2 + lctx;
 225
 226                if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) {
 227                        long fe1 = get_func_line(xe, xecfg, NULL,
 228                                                 xche->i1 + xche->chg1,
 229                                                 xe->xdf1.nrec);
 230                        while (fe1 > 0 && is_empty_rec(&xe->xdf1, fe1 - 1))
 231                                fe1--;
 232                        if (fe1 < 0)
 233                                fe1 = xe->xdf1.nrec;
 234                        if (fe1 > e1) {
 235                                e2 += fe1 - e1;
 236                                e1 = fe1;
 237                        }
 238
 239                        /*
 240                         * Overlap with next change?  Then include it
 241                         * in the current hunk and start over to find
 242                         * its new end.
 243                         */
 244                        if (xche->next) {
 245                                long l = XDL_MIN(xche->next->i1,
 246                                                 xe->xdf1.nrec - 1);
 247                                if (l - xecfg->ctxlen <= e1 ||
 248                                    get_func_line(xe, xecfg, NULL, l, e1) < 0) {
 249                                        xche = xche->next;
 250                                        goto post_context_calculation;
 251                                }
 252                        }
 253                }
 254
 255                /*
 256                 * Emit current hunk header.
 257                 */
 258
 259                if (xecfg->flags & XDL_EMIT_FUNCNAMES) {
 260                        get_func_line(xe, xecfg, &func_line,
 261                                      s1 - 1, funclineprev);
 262                        funclineprev = s1 - 1;
 263                }
 264                if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
 265                                      func_line.buf, func_line.len, ecb) < 0)
 266                        return -1;
 267
 268                /*
 269                 * Emit pre-context.
 270                 */
 271                for (; s2 < xch->i2; s2++)
 272                        if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0)
 273                                return -1;
 274
 275                for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) {
 276                        /*
 277                         * Merge previous with current change atom.
 278                         */
 279                        for (; s1 < xch->i1 && s2 < xch->i2; s1++, s2++)
 280                                if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0)
 281                                        return -1;
 282
 283                        /*
 284                         * Removes lines from the first file.
 285                         */
 286                        for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++)
 287                                if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0)
 288                                        return -1;
 289
 290                        /*
 291                         * Adds lines from the second file.
 292                         */
 293                        for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++)
 294                                if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0)
 295                                        return -1;
 296
 297                        if (xch == xche)
 298                                break;
 299                        s1 = xch->i1 + xch->chg1;
 300                        s2 = xch->i2 + xch->chg2;
 301                }
 302
 303                /*
 304                 * Emit post-context.
 305                 */
 306                for (s2 = xche->i2 + xche->chg2; s2 < e2; s2++)
 307                        if (xdl_emit_record(&xe->xdf2, s2, " ", ecb) < 0)
 308                                return -1;
 309        }
 310
 311        return 0;
 312}