1#include "cache.h" 2/* 3 * convert.c - convert a file when checking it out and checking it in. 4 * 5 * This should use the pathname to decide on whether it wants to do some 6 * more interesting conversions (automatic gzip/unzip, general format 7 * conversions etc etc), but by default it just does automatic CRLF<->LF 8 * translation when the "auto_crlf" option is set. 9 */ 10 11struct text_stat { 12 /* CR, LF and CRLF counts */ 13 unsigned cr, lf, crlf; 14 15 /* These are just approximations! */ 16 unsigned printable, nonprintable; 17}; 18 19static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats) 20{ 21 unsigned long i; 22 23 memset(stats, 0, sizeof(*stats)); 24 25 for (i = 0; i < size; i++) { 26 unsigned char c = buf[i]; 27 if (c == '\r') { 28 stats->cr++; 29 if (i+1 < size && buf[i+1] == '\n') 30 stats->crlf++; 31 continue; 32 } 33 if (c == '\n') { 34 stats->lf++; 35 continue; 36 } 37 if (c == 127) 38 /* DEL */ 39 stats->nonprintable++; 40 else if (c < 32) { 41 switch (c) { 42 /* BS, HT, ESC and FF */ 43 case '\b': case '\t': case '\033': case '\014': 44 stats->printable++; 45 break; 46 default: 47 stats->nonprintable++; 48 } 49 } 50 else 51 stats->printable++; 52 } 53} 54 55/* 56 * The same heuristics as diff.c::mmfile_is_binary() 57 */ 58static int is_binary(unsigned long size, struct text_stat *stats) 59{ 60 61 if ((stats->printable >> 7) < stats->nonprintable) 62 return 1; 63 /* 64 * Other heuristics? Average line length might be relevant, 65 * as might LF vs CR vs CRLF counts.. 66 * 67 * NOTE! It might be normal to have a low ratio of CRLF to LF 68 * (somebody starts with a LF-only file and edits it with an editor 69 * that adds CRLF only to lines that are added..). But do we 70 * want to support CR-only? Probably not. 71 */ 72 return 0; 73} 74 75int convert_to_git(const char *path, char **bufp, unsigned long *sizep) 76{ 77 char *buffer, *nbuf; 78 unsigned long size, nsize; 79 struct text_stat stats; 80 81 /* 82 * FIXME! Other pluggable conversions should go here, 83 * based on filename patterns. Right now we just do the 84 * stupid auto-CRLF one. 85 */ 86 if (!auto_crlf) 87 return 0; 88 89 size = *sizep; 90 if (!size) 91 return 0; 92 buffer = *bufp; 93 94 gather_stats(buffer, size, &stats); 95 96 /* No CR? Nothing to convert, regardless. */ 97 if (!stats.cr) 98 return 0; 99 100 /* 101 * We're currently not going to even try to convert stuff 102 * that has bare CR characters. Does anybody do that crazy 103 * stuff? 104 */ 105 if (stats.cr != stats.crlf) 106 return 0; 107 108 /* 109 * And add some heuristics for binary vs text, of course... 110 */ 111 if (is_binary(size, &stats)) 112 return 0; 113 114 /* 115 * Ok, allocate a new buffer, fill it in, and return true 116 * to let the caller know that we switched buffers on it. 117 */ 118 nsize = size - stats.crlf; 119 nbuf = xmalloc(nsize); 120 *bufp = nbuf; 121 *sizep = nsize; 122 do { 123 unsigned char c = *buffer++; 124 if (c != '\r') 125 *nbuf++ = c; 126 } while (--size); 127 128 return 1; 129} 130 131int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep) 132{ 133 char *buffer, *nbuf; 134 unsigned long size, nsize; 135 struct text_stat stats; 136 unsigned char last; 137 138 /* 139 * FIXME! Other pluggable conversions should go here, 140 * based on filename patterns. Right now we just do the 141 * stupid auto-CRLF one. 142 */ 143 if (!auto_crlf) 144 return 0; 145 146 size = *sizep; 147 if (!size) 148 return 0; 149 buffer = *bufp; 150 151 gather_stats(buffer, size, &stats); 152 153 /* No LF? Nothing to convert, regardless. */ 154 if (!stats.lf) 155 return 0; 156 157 /* Was it already in CRLF format? */ 158 if (stats.lf == stats.crlf) 159 return 0; 160 161 /* If we have any bare CR characters, we're not going to touch it */ 162 if (stats.cr != stats.crlf) 163 return 0; 164 165 if (is_binary(size, &stats)) 166 return 0; 167 168 /* 169 * Ok, allocate a new buffer, fill it in, and return true 170 * to let the caller know that we switched buffers on it. 171 */ 172 nsize = size + stats.lf - stats.crlf; 173 nbuf = xmalloc(nsize); 174 *bufp = nbuf; 175 *sizep = nsize; 176 last = 0; 177 do { 178 unsigned char c = *buffer++; 179 if (c == '\n' && last != '\r') 180 *nbuf++ = '\r'; 181 *nbuf++ = c; 182 last = c; 183 } while (--size); 184 185 return 1; 186}