convert.con commit Custom low-level merge driver support. (f3ef6b6)
   1#include "cache.h"
   2#include "attr.h"
   3
   4/*
   5 * convert.c - convert a file when checking it out and checking it in.
   6 *
   7 * This should use the pathname to decide on whether it wants to do some
   8 * more interesting conversions (automatic gzip/unzip, general format
   9 * conversions etc etc), but by default it just does automatic CRLF<->LF
  10 * translation when the "auto_crlf" option is set.
  11 */
  12
  13struct text_stat {
  14        /* CR, LF and CRLF counts */
  15        unsigned cr, lf, crlf;
  16
  17        /* These are just approximations! */
  18        unsigned printable, nonprintable;
  19};
  20
  21static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
  22{
  23        unsigned long i;
  24
  25        memset(stats, 0, sizeof(*stats));
  26
  27        for (i = 0; i < size; i++) {
  28                unsigned char c = buf[i];
  29                if (c == '\r') {
  30                        stats->cr++;
  31                        if (i+1 < size && buf[i+1] == '\n')
  32                                stats->crlf++;
  33                        continue;
  34                }
  35                if (c == '\n') {
  36                        stats->lf++;
  37                        continue;
  38                }
  39                if (c == 127)
  40                        /* DEL */
  41                        stats->nonprintable++;
  42                else if (c < 32) {
  43                        switch (c) {
  44                                /* BS, HT, ESC and FF */
  45                        case '\b': case '\t': case '\033': case '\014':
  46                                stats->printable++;
  47                                break;
  48                        default:
  49                                stats->nonprintable++;
  50                        }
  51                }
  52                else
  53                        stats->printable++;
  54        }
  55}
  56
  57/*
  58 * The same heuristics as diff.c::mmfile_is_binary()
  59 */
  60static int is_binary(unsigned long size, struct text_stat *stats)
  61{
  62
  63        if ((stats->printable >> 7) < stats->nonprintable)
  64                return 1;
  65        /*
  66         * Other heuristics? Average line length might be relevant,
  67         * as might LF vs CR vs CRLF counts..
  68         *
  69         * NOTE! It might be normal to have a low ratio of CRLF to LF
  70         * (somebody starts with a LF-only file and edits it with an editor
  71         * that adds CRLF only to lines that are added..). But do  we
  72         * want to support CR-only? Probably not.
  73         */
  74        return 0;
  75}
  76
  77static int crlf_to_git(const char *path, char **bufp, unsigned long *sizep, int guess)
  78{
  79        char *buffer, *nbuf;
  80        unsigned long size, nsize;
  81        struct text_stat stats;
  82
  83        if (guess && !auto_crlf)
  84                return 0;
  85
  86        size = *sizep;
  87        if (!size)
  88                return 0;
  89        buffer = *bufp;
  90
  91        gather_stats(buffer, size, &stats);
  92
  93        /* No CR? Nothing to convert, regardless. */
  94        if (!stats.cr)
  95                return 0;
  96
  97        if (guess) {
  98                /*
  99                 * We're currently not going to even try to convert stuff
 100                 * that has bare CR characters. Does anybody do that crazy
 101                 * stuff?
 102                 */
 103                if (stats.cr != stats.crlf)
 104                        return 0;
 105
 106                /*
 107                 * And add some heuristics for binary vs text, of course...
 108                 */
 109                if (is_binary(size, &stats))
 110                        return 0;
 111        }
 112
 113        /*
 114         * Ok, allocate a new buffer, fill it in, and return true
 115         * to let the caller know that we switched buffers on it.
 116         */
 117        nsize = size - stats.crlf;
 118        nbuf = xmalloc(nsize);
 119        *bufp = nbuf;
 120        *sizep = nsize;
 121
 122        if (guess) {
 123                do {
 124                        unsigned char c = *buffer++;
 125                        if (c != '\r')
 126                                *nbuf++ = c;
 127                } while (--size);
 128        } else {
 129                do {
 130                        unsigned char c = *buffer++;
 131                        if (! (c == '\r' && (1 < size && *buffer == '\n')))
 132                                *nbuf++ = c;
 133                } while (--size);
 134        }
 135
 136        return 1;
 137}
 138
 139static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
 140{
 141        return crlf_to_git(path, bufp, sizep, 1);
 142}
 143
 144static int forcecrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
 145{
 146        return crlf_to_git(path, bufp, sizep, 0);
 147}
 148
 149static int crlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep, int guess)
 150{
 151        char *buffer, *nbuf;
 152        unsigned long size, nsize;
 153        struct text_stat stats;
 154        unsigned char last;
 155
 156        if (guess && auto_crlf <= 0)
 157                return 0;
 158
 159        size = *sizep;
 160        if (!size)
 161                return 0;
 162        buffer = *bufp;
 163
 164        gather_stats(buffer, size, &stats);
 165
 166        /* No LF? Nothing to convert, regardless. */
 167        if (!stats.lf)
 168                return 0;
 169
 170        /* Was it already in CRLF format? */
 171        if (stats.lf == stats.crlf)
 172                return 0;
 173
 174        if (guess) {
 175                /* If we have any bare CR characters, we're not going to touch it */
 176                if (stats.cr != stats.crlf)
 177                        return 0;
 178
 179                if (is_binary(size, &stats))
 180                        return 0;
 181        }
 182
 183        /*
 184         * Ok, allocate a new buffer, fill it in, and return true
 185         * to let the caller know that we switched buffers on it.
 186         */
 187        nsize = size + stats.lf - stats.crlf;
 188        nbuf = xmalloc(nsize);
 189        *bufp = nbuf;
 190        *sizep = nsize;
 191        last = 0;
 192        do {
 193                unsigned char c = *buffer++;
 194                if (c == '\n' && last != '\r')
 195                        *nbuf++ = '\r';
 196                *nbuf++ = c;
 197                last = c;
 198        } while (--size);
 199
 200        return 1;
 201}
 202
 203static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
 204{
 205        return crlf_to_working_tree(path, bufp, sizep, 1);
 206}
 207
 208static int forcecrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
 209{
 210        return crlf_to_working_tree(path, bufp, sizep, 0);
 211}
 212
 213static void setup_crlf_check(struct git_attr_check *check)
 214{
 215        static struct git_attr *attr_crlf;
 216
 217        if (!attr_crlf)
 218                attr_crlf = git_attr("crlf", 4);
 219        check->attr = attr_crlf;
 220}
 221
 222static int git_path_check_crlf(const char *path)
 223{
 224        struct git_attr_check attr_crlf_check;
 225
 226        setup_crlf_check(&attr_crlf_check);
 227
 228        if (!git_checkattr(path, 1, &attr_crlf_check)) {
 229                void *value = attr_crlf_check.value;
 230                if (ATTR_TRUE(value))
 231                        return 1;
 232                else if (ATTR_FALSE(value))
 233                        return 0;
 234                else if (ATTR_UNSET(value))
 235                        ;
 236                else
 237                        die("unknown value %s given to 'crlf' attribute",
 238                            (char *)value);
 239        }
 240        return -1;
 241}
 242
 243int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
 244{
 245        switch (git_path_check_crlf(path)) {
 246        case 0:
 247                return 0;
 248        case 1:
 249                return forcecrlf_to_git(path, bufp, sizep);
 250        default:
 251                return autocrlf_to_git(path, bufp, sizep);
 252        }
 253}
 254
 255int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
 256{
 257        switch (git_path_check_crlf(path)) {
 258        case 0:
 259                return 0;
 260        case 1:
 261                return forcecrlf_to_working_tree(path, bufp, sizep);
 262        default:
 263                return autocrlf_to_working_tree(path, bufp, sizep);
 264        }
 265}