From: Junio C Hamano Date: Wed, 10 Feb 2016 22:20:07 +0000 (-0800) Subject: Merge branch 'ls/clean-smudge-override-in-config' X-Git-Tag: v2.8.0-rc0~61 X-Git-Url: https://git.lorimer.id.au/gitweb.git/diff_plain/a3764e7da7ae2c30e2e1bbeaa0961a17bced2078?ds=sidebyside;hp=-c Merge branch 'ls/clean-smudge-override-in-config' Clean/smudge filters defined in a configuration file of lower precedence can now be overridden to be a pass-through no-op by setting the variable to an empty string. * ls/clean-smudge-override-in-config: convert: treat an empty string for clean/smudge filters as "cat" --- a3764e7da7ae2c30e2e1bbeaa0961a17bced2078 diff --combined convert.c index 4bb4ec1d83,02d5f1e23c..8cd6222a26 --- a/convert.c +++ b/convert.c @@@ -13,11 -13,6 +13,11 @@@ * translation when the "text" attribute or "auto_crlf" option is set. */ +/* Stat bits: When BIN is set, the txt bits are unset */ +#define CONVERT_STAT_BITS_TXT_LF 0x1 +#define CONVERT_STAT_BITS_TXT_CRLF 0x2 +#define CONVERT_STAT_BITS_BIN 0x4 + enum crlf_action { CRLF_GUESS = -1, CRLF_BINARY = 0, @@@ -80,75 -75,26 +80,75 @@@ static void gather_stats(const char *bu /* * The same heuristics as diff.c::mmfile_is_binary() + * We treat files with bare CR as binary */ -static int is_binary(unsigned long size, struct text_stat *stats) +static int convert_is_binary(unsigned long size, const struct text_stat *stats) { - + if (stats->cr != stats->crlf) + return 1; if (stats->nul) return 1; if ((stats->printable >> 7) < stats->nonprintable) return 1; - /* - * Other heuristics? Average line length might be relevant, - * as might LF vs CR vs CRLF counts.. - * - * NOTE! It might be normal to have a low ratio of CRLF to LF - * (somebody starts with a LF-only file and edits it with an editor - * that adds CRLF only to lines that are added..). But do we - * want to support CR-only? Probably not. - */ return 0; } +static unsigned int gather_convert_stats(const char *data, unsigned long size) +{ + struct text_stat stats; + if (!data || !size) + return 0; + gather_stats(data, size, &stats); + if (convert_is_binary(size, &stats)) + return CONVERT_STAT_BITS_BIN; + else if (stats.crlf && stats.crlf == stats.lf) + return CONVERT_STAT_BITS_TXT_CRLF; + else if (stats.crlf && stats.lf) + return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF; + else if (stats.lf) + return CONVERT_STAT_BITS_TXT_LF; + else + return 0; +} + +static const char *gather_convert_stats_ascii(const char *data, unsigned long size) +{ + unsigned int convert_stats = gather_convert_stats(data, size); + + if (convert_stats & CONVERT_STAT_BITS_BIN) + return "-text"; + switch (convert_stats) { + case CONVERT_STAT_BITS_TXT_LF: + return "lf"; + case CONVERT_STAT_BITS_TXT_CRLF: + return "crlf"; + case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF: + return "mixed"; + default: + return "none"; + } +} + +const char *get_cached_convert_stats_ascii(const char *path) +{ + const char *ret; + unsigned long sz; + void *data = read_blob_data_from_cache(path, &sz); + ret = gather_convert_stats_ascii(data, sz); + free(data); + return ret; +} + +const char *get_wt_convert_stats_ascii(const char *path) +{ + const char *ret = ""; + struct strbuf sb = STRBUF_INIT; + if (strbuf_read_file(&sb, path, 0) >= 0) + ret = gather_convert_stats_ascii(sb.buf, sb.len); + strbuf_release(&sb); + return ret; +} + static enum eol output_eol(enum crlf_action crlf_action) { switch (crlf_action) { @@@ -241,7 -187,18 +241,7 @@@ static int crlf_to_git(const char *path gather_stats(src, len, &stats); if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) { - /* - * We're currently not going to even try to convert stuff - * that has bare CR characters. Does anybody do that crazy - * stuff? - */ - if (stats.cr != stats.crlf) - return 0; - - /* - * And add some heuristics for binary vs text, of course... - */ - if (is_binary(len, &stats)) + if (convert_is_binary(len, &stats)) return 0; if (crlf_action == CRLF_GUESS) { @@@ -320,7 -277,11 +320,7 @@@ static int crlf_to_worktree(const char return 0; } - /* If we have any bare CR characters, we're not going to touch it */ - if (stats.cr != stats.crlf) - return 0; - - if (is_binary(len, &stats)) + if (convert_is_binary(len, &stats)) return 0; } @@@ -434,7 -395,7 +434,7 @@@ static int apply_filter(const char *pat struct async async; struct filter_params params; - if (!cmd) + if (!cmd || !*cmd) return 0; if (!dst) @@@ -816,30 -777,6 +816,30 @@@ int would_convert_to_git_filter_fd(cons return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean); } +const char *get_convert_attr_ascii(const char *path) +{ + struct conv_attrs ca; + enum crlf_action crlf_action; + + convert_attrs(&ca, path); + crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr); + switch (crlf_action) { + case CRLF_GUESS: + return ""; + case CRLF_BINARY: + return "-text"; + case CRLF_TEXT: + return "text"; + case CRLF_INPUT: + return "text eol=lf"; + case CRLF_CRLF: + return "text=auto eol=crlf"; + case CRLF_AUTO: + return "text=auto"; + } + return ""; +} + int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst, enum safe_crlf checksafe) {