1#include"cache.h" 2/* 3 * convert.c - convert a file when checking it out and checking it in. 4 * 5 * This should use the pathname to decide on whether it wants to do some 6 * more interesting conversions (automatic gzip/unzip, general format 7 * conversions etc etc), but by default it just does automatic CRLF<->LF 8 * translation when the "auto_crlf" option is set. 9 */ 10 11struct text_stat { 12/* CR, LF and CRLF counts */ 13unsigned cr, lf, crlf; 14 15/* These are just approximations! */ 16unsigned printable, nonprintable; 17}; 18 19static voidgather_stats(const char*buf,unsigned long size,struct text_stat *stats) 20{ 21unsigned long i; 22 23memset(stats,0,sizeof(*stats)); 24 25for(i =0; i < size; i++) { 26unsigned char c = buf[i]; 27if(c =='\r') { 28 stats->cr++; 29if(i+1< size && buf[i+1] =='\n') 30 stats->crlf++; 31continue; 32} 33if(c =='\n') { 34 stats->lf++; 35continue; 36} 37if(c ==127) 38/* DEL */ 39 stats->nonprintable++; 40else if(c <32) { 41switch(c) { 42/* BS, HT, ESC and FF */ 43case'\b':case'\t':case'\033':case'\014': 44 stats->printable++; 45break; 46default: 47 stats->nonprintable++; 48} 49} 50else 51 stats->printable++; 52} 53} 54 55/* 56 * The same heuristics as diff.c::mmfile_is_binary() 57 */ 58static intis_binary(unsigned long size,struct text_stat *stats) 59{ 60 61if((stats->printable >>7) < stats->nonprintable) 62return1; 63/* 64 * Other heuristics? Average line length might be relevant, 65 * as might LF vs CR vs CRLF counts.. 66 * 67 * NOTE! It might be normal to have a low ratio of CRLF to LF 68 * (somebody starts with a LF-only file and edits it with an editor 69 * that adds CRLF only to lines that are added..). But do we 70 * want to support CR-only? Probably not. 71 */ 72return0; 73} 74 75intconvert_to_git(const char*path,char**bufp,unsigned long*sizep) 76{ 77char*buffer, *nbuf; 78unsigned long size, nsize; 79struct text_stat stats; 80 81/* 82 * FIXME! Other pluggable conversions should go here, 83 * based on filename patterns. Right now we just do the 84 * stupid auto-CRLF one. 85 */ 86if(!auto_crlf) 87return0; 88 89 size = *sizep; 90if(!size) 91return0; 92 buffer = *bufp; 93 94gather_stats(buffer, size, &stats); 95 96/* No CR? Nothing to convert, regardless. */ 97if(!stats.cr) 98return0; 99 100/* 101 * We're currently not going to even try to convert stuff 102 * that has bare CR characters. Does anybody do that crazy 103 * stuff? 104 */ 105if(stats.cr != stats.crlf) 106return0; 107 108/* 109 * And add some heuristics for binary vs text, of course... 110 */ 111if(is_binary(size, &stats)) 112return0; 113 114/* 115 * Ok, allocate a new buffer, fill it in, and return true 116 * to let the caller know that we switched buffers on it. 117 */ 118 nsize = size - stats.crlf; 119 nbuf =xmalloc(nsize); 120*bufp = nbuf; 121*sizep = nsize; 122do{ 123unsigned char c = *buffer++; 124if(c !='\r') 125*nbuf++ = c; 126}while(--size); 127 128return1; 129} 130 131intconvert_to_working_tree(const char*path,char**bufp,unsigned long*sizep) 132{ 133char*buffer, *nbuf; 134unsigned long size, nsize; 135struct text_stat stats; 136unsigned char last; 137 138/* 139 * FIXME! Other pluggable conversions should go here, 140 * based on filename patterns. Right now we just do the 141 * stupid auto-CRLF one. 142 */ 143if(auto_crlf <=0) 144return0; 145 146 size = *sizep; 147if(!size) 148return0; 149 buffer = *bufp; 150 151gather_stats(buffer, size, &stats); 152 153/* No LF? Nothing to convert, regardless. */ 154if(!stats.lf) 155return0; 156 157/* Was it already in CRLF format? */ 158if(stats.lf == stats.crlf) 159return0; 160 161/* If we have any bare CR characters, we're not going to touch it */ 162if(stats.cr != stats.crlf) 163return0; 164 165if(is_binary(size, &stats)) 166return0; 167 168/* 169 * Ok, allocate a new buffer, fill it in, and return true 170 * to let the caller know that we switched buffers on it. 171 */ 172 nsize = size + stats.lf - stats.crlf; 173 nbuf =xmalloc(nsize); 174*bufp = nbuf; 175*sizep = nsize; 176 last =0; 177do{ 178unsigned char c = *buffer++; 179if(c =='\n'&& last !='\r') 180*nbuf++ ='\r'; 181*nbuf++ = c; 182 last = c; 183}while(--size); 184 185return1; 186}