1#include "cache.h" 2#include "attr.h" 3 4/* 5 * convert.c - convert a file when checking it out and checking it in. 6 * 7 * This should use the pathname to decide on whether it wants to do some 8 * more interesting conversions (automatic gzip/unzip, general format 9 * conversions etc etc), but by default it just does automatic CRLF<->LF 10 * translation when the "auto_crlf" option is set. 11 */ 12 13struct text_stat { 14 /* CR, LF and CRLF counts */ 15 unsigned cr, lf, crlf; 16 17 /* These are just approximations! */ 18 unsigned printable, nonprintable; 19}; 20 21static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats) 22{ 23 unsigned long i; 24 25 memset(stats, 0, sizeof(*stats)); 26 27 for (i = 0; i < size; i++) { 28 unsigned char c = buf[i]; 29 if (c == '\r') { 30 stats->cr++; 31 if (i+1 < size && buf[i+1] == '\n') 32 stats->crlf++; 33 continue; 34 } 35 if (c == '\n') { 36 stats->lf++; 37 continue; 38 } 39 if (c == 127) 40 /* DEL */ 41 stats->nonprintable++; 42 else if (c < 32) { 43 switch (c) { 44 /* BS, HT, ESC and FF */ 45 case '\b': case '\t': case '\033': case '\014': 46 stats->printable++; 47 break; 48 default: 49 stats->nonprintable++; 50 } 51 } 52 else 53 stats->printable++; 54 } 55} 56 57/* 58 * The same heuristics as diff.c::mmfile_is_binary() 59 */ 60static int is_binary(unsigned long size, struct text_stat *stats) 61{ 62 63 if ((stats->printable >> 7) < stats->nonprintable) 64 return 1; 65 /* 66 * Other heuristics? Average line length might be relevant, 67 * as might LF vs CR vs CRLF counts.. 68 * 69 * NOTE! It might be normal to have a low ratio of CRLF to LF 70 * (somebody starts with a LF-only file and edits it with an editor 71 * that adds CRLF only to lines that are added..). But do we 72 * want to support CR-only? Probably not. 73 */ 74 return 0; 75} 76 77static int autocrlf_to_git(const char *path, char **bufp, unsigned long *sizep) 78{ 79 char *buffer, *nbuf; 80 unsigned long size, nsize; 81 struct text_stat stats; 82 83 if (!auto_crlf) 84 return 0; 85 86 size = *sizep; 87 if (!size) 88 return 0; 89 buffer = *bufp; 90 91 gather_stats(buffer, size, &stats); 92 93 /* No CR? Nothing to convert, regardless. */ 94 if (!stats.cr) 95 return 0; 96 97 /* 98 * We're currently not going to even try to convert stuff 99 * that has bare CR characters. Does anybody do that crazy 100 * stuff? 101 */ 102 if (stats.cr != stats.crlf) 103 return 0; 104 105 /* 106 * And add some heuristics for binary vs text, of course... 107 */ 108 if (is_binary(size, &stats)) 109 return 0; 110 111 /* 112 * Ok, allocate a new buffer, fill it in, and return true 113 * to let the caller know that we switched buffers on it. 114 */ 115 nsize = size - stats.crlf; 116 nbuf = xmalloc(nsize); 117 *bufp = nbuf; 118 *sizep = nsize; 119 do { 120 unsigned char c = *buffer++; 121 if (c != '\r') 122 *nbuf++ = c; 123 } while (--size); 124 125 return 1; 126} 127 128static int autocrlf_to_working_tree(const char *path, char **bufp, unsigned long *sizep) 129{ 130 char *buffer, *nbuf; 131 unsigned long size, nsize; 132 struct text_stat stats; 133 unsigned char last; 134 135 /* 136 * FIXME! Other pluggable conversions should go here, 137 * based on filename patterns. Right now we just do the 138 * stupid auto-CRLF one. 139 */ 140 if (auto_crlf <= 0) 141 return 0; 142 143 size = *sizep; 144 if (!size) 145 return 0; 146 buffer = *bufp; 147 148 gather_stats(buffer, size, &stats); 149 150 /* No LF? Nothing to convert, regardless. */ 151 if (!stats.lf) 152 return 0; 153 154 /* Was it already in CRLF format? */ 155 if (stats.lf == stats.crlf) 156 return 0; 157 158 /* If we have any bare CR characters, we're not going to touch it */ 159 if (stats.cr != stats.crlf) 160 return 0; 161 162 if (is_binary(size, &stats)) 163 return 0; 164 165 /* 166 * Ok, allocate a new buffer, fill it in, and return true 167 * to let the caller know that we switched buffers on it. 168 */ 169 nsize = size + stats.lf - stats.crlf; 170 nbuf = xmalloc(nsize); 171 *bufp = nbuf; 172 *sizep = nsize; 173 last = 0; 174 do { 175 unsigned char c = *buffer++; 176 if (c == '\n' && last != '\r') 177 *nbuf++ = '\r'; 178 *nbuf++ = c; 179 last = c; 180 } while (--size); 181 182 return 1; 183} 184 185static void setup_crlf_check(struct git_attr_check *check) 186{ 187 static struct git_attr *attr_crlf; 188 189 if (!attr_crlf) 190 attr_crlf = git_attr("crlf", 4); 191 check->attr = attr_crlf; 192} 193 194static int git_path_is_binary(const char *path) 195{ 196 struct git_attr_check attr_crlf_check; 197 198 setup_crlf_check(&attr_crlf_check); 199 200 /* 201 * If crlf is not mentioned, default to autocrlf; 202 * disable autocrlf only when crlf attribute is explicitly 203 * unset. 204 */ 205 return (!git_checkattr(path, 1, &attr_crlf_check) && 206 (0 == attr_crlf_check.isset)); 207} 208 209int convert_to_git(const char *path, char **bufp, unsigned long *sizep) 210{ 211 if (git_path_is_binary(path)) 212 return 0; 213 return autocrlf_to_git(path, bufp, sizep); 214} 215 216int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep) 217{ 218 if (git_path_is_binary(path)) 219 return 0; 220 return autocrlf_to_working_tree(path, bufp, sizep); 221}