20c744aa23652b8b93ea96137a668592c6e28c2d
   1#include "cache.h"
   2#include "attr.h"
   3
   4/*
   5 * convert.c - convert a file when checking it out and checking it in.
   6 *
   7 * This should use the pathname to decide on whether it wants to do some
   8 * more interesting conversions (automatic gzip/unzip, general format
   9 * conversions etc etc), but by default it just does automatic CRLF<->LF
  10 * translation when the "auto_crlf" option is set.
  11 */
  12
  13struct text_stat {
  14        /* CR, LF and CRLF counts */
  15        unsigned cr, lf, crlf;
  16
  17        /* These are just approximations! */
  18        unsigned printable, nonprintable;
  19};
  20
  21static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
  22{
  23        unsigned long i;
  24
  25        memset(stats, 0, sizeof(*stats));
  26
  27        for (i = 0; i < size; i++) {
  28                unsigned char c = buf[i];
  29                if (c == '\r') {
  30                        stats->cr++;
  31                        if (i+1 < size && buf[i+1] == '\n')
  32                                stats->crlf++;
  33                        continue;
  34                }
  35                if (c == '\n') {
  36                        stats->lf++;
  37                        continue;
  38                }
  39                if (c == 127)
  40                        /* DEL */
  41                        stats->nonprintable++;
  42                else if (c < 32) {
  43                        switch (c) {
  44                                /* BS, HT, ESC and FF */
  45                        case '\b': case '\t': case '\033': case '\014':
  46                                stats->printable++;
  47                                break;
  48                        default:
  49                                stats->nonprintable++;
  50                        }
  51                }
  52                else
  53                        stats->printable++;
  54        }
  55}
  56
  57/*
  58 * The same heuristics as diff.c::mmfile_is_binary()
  59 */
  60static int is_binary(unsigned long size, struct text_stat *stats)
  61{
  62
  63        if ((stats->printable >> 7) < stats->nonprintable)
  64                return 1;
  65        /*
  66         * Other heuristics? Average line length might be relevant,
  67         * as might LF vs CR vs CRLF counts..
  68         *
  69         * NOTE! It might be normal to have a low ratio of CRLF to LF
  70         * (somebody starts with a LF-only file and edits it with an editor
  71         * that adds CRLF only to lines that are added..). But do  we
  72         * want to support CR-only? Probably not.
  73         */
  74        return 0;
  75}
  76
  77static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep)
  78{
  79        char *buffer, *nbuf;
  80        unsigned long size, nsize;
  81        struct text_stat stats;
  82
  83        if (!auto_crlf)
  84                return 0;
  85
  86        size = *sizep;
  87        if (!size)
  88                return 0;
  89        buffer = *bufp;
  90
  91        gather_stats(buffer, size, &stats);
  92
  93        /* No CR? Nothing to convert, regardless. */
  94        if (!stats.cr)
  95                return 0;
  96
  97        /*
  98         * We're currently not going to even try to convert stuff
  99         * that has bare CR characters. Does anybody do that crazy
 100         * stuff?
 101         */
 102        if (stats.cr != stats.crlf)
 103                return 0;
 104
 105        /*
 106         * And add some heuristics for binary vs text, of course...
 107         */
 108        if (is_binary(size, &stats))
 109                return 0;
 110
 111        /*
 112         * Ok, allocate a new buffer, fill it in, and return true
 113         * to let the caller know that we switched buffers on it.
 114         */
 115        nsize = size - stats.crlf;
 116        nbuf = xmalloc(nsize);
 117        *bufp = nbuf;
 118        *sizep = nsize;
 119        do {
 120                unsigned char c = *buffer++;
 121                if (c != '\r')
 122                        *nbuf++ = c;
 123        } while (--size);
 124
 125        return 1;
 126}
 127
 128static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
 129{
 130        char *buffer, *nbuf;
 131        unsigned long size, nsize;
 132        struct text_stat stats;
 133        unsigned char last;
 134
 135        /*
 136         * FIXME! Other pluggable conversions should go here,
 137         * based on filename patterns. Right now we just do the
 138         * stupid auto-CRLF one.
 139         */
 140        if (auto_crlf <= 0)
 141                return 0;
 142
 143        size = *sizep;
 144        if (!size)
 145                return 0;
 146        buffer = *bufp;
 147
 148        gather_stats(buffer, size, &stats);
 149
 150        /* No LF? Nothing to convert, regardless. */
 151        if (!stats.lf)
 152                return 0;
 153
 154        /* Was it already in CRLF format? */
 155        if (stats.lf == stats.crlf)
 156                return 0;
 157
 158        /* If we have any bare CR characters, we're not going to touch it */
 159        if (stats.cr != stats.crlf)
 160                return 0;
 161
 162        if (is_binary(size, &stats))
 163                return 0;
 164
 165        /*
 166         * Ok, allocate a new buffer, fill it in, and return true
 167         * to let the caller know that we switched buffers on it.
 168         */
 169        nsize = size + stats.lf - stats.crlf;
 170        nbuf = xmalloc(nsize);
 171        *bufp = nbuf;
 172        *sizep = nsize;
 173        last = 0;
 174        do {
 175                unsigned char c = *buffer++;
 176                if (c == '\n' && last != '\r')
 177                        *nbuf++ = '\r';
 178                *nbuf++ = c;
 179                last = c;
 180        } while (--size);
 181
 182        return 1;
 183}
 184
 185static void setup_crlf_check(struct git_attr_check *check)
 186{
 187        static struct git_attr *attr_crlf;
 188
 189        if (!attr_crlf)
 190                attr_crlf = git_attr("crlf", 4);
 191        check->attr = attr_crlf;
 192}
 193
 194static int git_path_is_binary(const char *path)
 195{
 196        struct git_attr_check attr_crlf_check;
 197
 198        setup_crlf_check(&attr_crlf_check);
 199
 200        /*
 201         * If crlf is not mentioned, default to autocrlf;
 202         * disable autocrlf only when crlf attribute is explicitly
 203         * unset.
 204         */
 205        return (!git_checkattr(path, 1, &attr_crlf_check) &&
 206                (0 == attr_crlf_check.isset));
 207}
 208
 209int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
 210{
 211        if (git_path_is_binary(path))
 212                return 0;
 213        return autocrlf_to_git(path, bufp, sizep);
 214}
 215
 216int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
 217{
 218        if (git_path_is_binary(path))
 219                return 0;
 220        return autocrlf_to_working_tree(path, bufp, sizep);
 221}