diffcore-delta.con commit Merge branch 'jc/diff' (0825f96)
   1#include "cache.h"
   2#include "diff.h"
   3#include "diffcore.h"
   4
   5struct linehash {
   6        unsigned long bytes;
   7        unsigned long hash;
   8};
   9
  10static unsigned long hash_extended_line(const unsigned char **buf_p,
  11                                        unsigned long left)
  12{
  13        /* An extended line is zero or more whitespace letters (including LF)
  14         * followed by one non whitespace letter followed by zero or more
  15         * non LF, and terminated with by a LF (or EOF).
  16         */
  17        const unsigned char *bol = *buf_p;
  18        const unsigned char *buf = bol;
  19        unsigned long hashval = 0;
  20        while (left) {
  21                unsigned c = *buf++;
  22                if (!c)
  23                        goto binary;
  24                left--;
  25                if (' ' < c) {
  26                        hashval = c;
  27                        break;
  28                }
  29        }
  30        while (left) {
  31                unsigned c = *buf++;
  32                if (!c)
  33                        goto binary;
  34                left--;
  35                if (c == '\n')
  36                        break;
  37                if (' ' < c)
  38                        hashval = hashval * 11 + c;
  39        }
  40        *buf_p = buf;
  41        return hashval;
  42
  43 binary:
  44        *buf_p = NULL;
  45        return 0;
  46}
  47
  48static int linehash_compare(const void *a_, const void *b_)
  49{
  50        struct linehash *a = (struct linehash *) a_;
  51        struct linehash *b = (struct linehash *) b_;
  52        if (a->hash < b->hash) return -1;
  53        if (a->hash > b->hash) return 1;
  54        return 0;
  55}
  56
  57static struct linehash *hash_lines(const unsigned char *buf,
  58                                   unsigned long size)
  59{
  60        const unsigned char *eobuf = buf + size;
  61        struct linehash *line = NULL;
  62        int alloc = 0, used = 0;
  63
  64        while (buf < eobuf) {
  65                const unsigned char *ptr = buf;
  66                unsigned long hash = hash_extended_line(&buf, eobuf-ptr);
  67                if (!buf) {
  68                        free(line);
  69                        return NULL;
  70                }
  71                if (alloc <= used) {
  72                        alloc = alloc_nr(alloc);
  73                        line = xrealloc(line, sizeof(*line) * alloc);
  74                }
  75                line[used].bytes = buf - ptr;
  76                line[used].hash = hash;
  77                used++;
  78        }
  79        qsort(line, used, sizeof(*line), linehash_compare);
  80
  81        /* Terminate the list */
  82        if (alloc <= used)
  83                line = xrealloc(line, sizeof(*line) * (used+1));
  84        line[used].bytes = line[used].hash = 0;
  85        return line;
  86}
  87
  88int diffcore_count_changes(void *src, unsigned long src_size,
  89                           void *dst, unsigned long dst_size,
  90                           unsigned long delta_limit,
  91                           unsigned long *src_copied,
  92                           unsigned long *literal_added)
  93{
  94        struct linehash *src_lines, *dst_lines;
  95        unsigned long sc, la;
  96
  97        src_lines = hash_lines(src, src_size);
  98        if (!src_lines)
  99                return -1;
 100        dst_lines = hash_lines(dst, dst_size);
 101        if (!dst_lines) {
 102                free(src_lines);
 103                return -1;
 104        }
 105        sc = la = 0;
 106        while (src_lines->bytes && dst_lines->bytes) {
 107                int cmp = linehash_compare(src_lines, dst_lines);
 108                if (!cmp) {
 109                        sc += src_lines->bytes;
 110                        src_lines++;
 111                        dst_lines++;
 112                        continue;
 113                }
 114                if (cmp < 0) {
 115                        src_lines++;
 116                        continue;
 117                }
 118                la += dst_lines->bytes;
 119                dst_lines++;
 120        }
 121        while (dst_lines->bytes) {
 122                la += dst_lines->bytes;
 123                dst_lines++;
 124        }
 125        *src_copied = sc;
 126        *literal_added = la;
 127        return 0;
 128}