Merge branch 'tb/ls-files-eol'
authorJunio C Hamano <gitster@pobox.com>
Wed, 3 Feb 2016 22:15:59 +0000 (14:15 -0800)
committerJunio C Hamano <gitster@pobox.com>
Wed, 3 Feb 2016 22:15:59 +0000 (14:15 -0800)
"git ls-files" learned a new "--eol" option to help diagnose
end-of-line problems.

* tb/ls-files-eol:
ls-files: add eol diagnostics

Documentation/git-ls-files.txt
builtin/ls-files.c
convert.c
convert.h
t/t0027-auto-crlf.sh
index e26f01fb1d1aec3526879059fe6ee20438965f68..0e08f567a163571c7d8f2a4c10984691f5aba566 100644 (file)
@@ -12,6 +12,7 @@ SYNOPSIS
 'git ls-files' [-z] [-t] [-v]
                (--[cached|deleted|others|ignored|stage|unmerged|killed|modified])*
                (-[c|d|o|i|s|u|k|m])*
+               [--eol]
                [-x <pattern>|--exclude=<pattern>]
                [-X <file>|--exclude-from=<file>]
                [--exclude-per-directory=<file>]
@@ -147,6 +148,24 @@ a space) at the start of each line:
        possible for manual inspection; the exact format may change at
        any time.
 
+--eol::
+       Show <eolinfo> and <eolattr> of files.
+       <eolinfo> is the file content identification used by Git when
+       the "text" attribute is "auto" (or not set and core.autocrlf is not false).
+       <eolinfo> is either "-text", "none", "lf", "crlf", "mixed" or "".
++
+"" means the file is not a regular file, it is not in the index or
+not accessable in the working tree.
++
+<eolattr> is the attribute that is used when checking out or committing,
+it is either "", "-text", "text", "text=auto", "text eol=lf", "text eol=crlf".
+Note: Currently Git does not support "text=auto eol=lf" or "text=auto eol=crlf",
+that may change in the future.
++
+Both the <eolinfo> in the index ("i/<eolinfo>")
+and in the working tree ("w/<eolinfo>") are shown for regular files,
+followed by the  ("attr/<eolattr>").
+
 \--::
        Do not interpret any more arguments as options.
 
@@ -161,6 +180,9 @@ which case it outputs:
 
         [<tag> ]<mode> <object> <stage> <file>
 
+'git ls-files --eol' will show
+       i/<eolinfo><SPACES>w/<eolinfo><SPACES>attr/<eolattr><SPACE*><TAB><file>
+
 'git ls-files --unmerged' and 'git ls-files --stage' can be used to examine
 detailed information on unmerged paths.
 
index b6a7cb0c7c48293c6348806cb342a9289f146740..dedf02dc702d1cc94399f45869e5d01d2f0eecb9 100644 (file)
@@ -27,6 +27,7 @@ static int show_killed;
 static int show_valid_bit;
 static int line_terminator = '\n';
 static int debug_mode;
+static int show_eol;
 
 static const char *prefix;
 static int max_prefix_len;
@@ -47,6 +48,23 @@ static const char *tag_modified = "";
 static const char *tag_skip_worktree = "";
 static const char *tag_resolve_undo = "";
 
+static void write_eolinfo(const struct cache_entry *ce, const char *path)
+{
+       if (!show_eol)
+               return;
+       else {
+               struct stat st;
+               const char *i_txt = "";
+               const char *w_txt = "";
+               const char *a_txt = get_convert_attr_ascii(path);
+               if (ce && S_ISREG(ce->ce_mode))
+                       i_txt = get_cached_convert_stats_ascii(ce->name);
+               if (!lstat(path, &st) && S_ISREG(st.st_mode))
+                       w_txt = get_wt_convert_stats_ascii(path);
+               printf("i/%-5s w/%-5s attr/%-17s\t", i_txt, w_txt, a_txt);
+       }
+}
+
 static void write_name(const char *name)
 {
        /*
@@ -68,6 +86,7 @@ static void show_dir_entry(const char *tag, struct dir_entry *ent)
                return;
 
        fputs(tag, stdout);
+       write_eolinfo(NULL, ent->name);
        write_name(ent->name);
 }
 
@@ -170,6 +189,7 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce)
                       find_unique_abbrev(ce->sha1,abbrev),
                       ce_stage(ce));
        }
+       write_eolinfo(ce, ce->name);
        write_name(ce->name);
        if (debug_mode) {
                const struct stat_data *sd = &ce->ce_stat_data;
@@ -433,6 +453,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
                OPT_BIT(0, "directory", &dir.flags,
                        N_("show 'other' directories' names only"),
                        DIR_SHOW_OTHER_DIRECTORIES),
+               OPT_BOOL(0, "eol", &show_eol, N_("show line endings of files")),
                OPT_NEGBIT(0, "empty-directory", &dir.flags,
                        N_("don't show empty directories"),
                        DIR_HIDE_EMPTY_DIRECTORIES),
index 814e814438b7c0f4f84850787670766fb4765f08..4bb4ec1d836e695a86f309b276fd07e300885c57 100644 (file)
--- a/convert.c
+++ b/convert.c
  * translation when the "text" attribute or "auto_crlf" option is set.
  */
 
+/* Stat bits: When BIN is set, the txt bits are unset */
+#define CONVERT_STAT_BITS_TXT_LF    0x1
+#define CONVERT_STAT_BITS_TXT_CRLF  0x2
+#define CONVERT_STAT_BITS_BIN       0x4
+
 enum crlf_action {
        CRLF_GUESS = -1,
        CRLF_BINARY = 0,
@@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
 
 /*
  * The same heuristics as diff.c::mmfile_is_binary()
+ * We treat files with bare CR as binary
  */
-static int is_binary(unsigned long size, struct text_stat *stats)
+static int convert_is_binary(unsigned long size, const struct text_stat *stats)
 {
-
+       if (stats->cr != stats->crlf)
+               return 1;
        if (stats->nul)
                return 1;
        if ((stats->printable >> 7) < stats->nonprintable)
                return 1;
-       /*
-        * Other heuristics? Average line length might be relevant,
-        * as might LF vs CR vs CRLF counts..
-        *
-        * NOTE! It might be normal to have a low ratio of CRLF to LF
-        * (somebody starts with a LF-only file and edits it with an editor
-        * that adds CRLF only to lines that are added..). But do  we
-        * want to support CR-only? Probably not.
-        */
        return 0;
 }
 
+static unsigned int gather_convert_stats(const char *data, unsigned long size)
+{
+       struct text_stat stats;
+       if (!data || !size)
+               return 0;
+       gather_stats(data, size, &stats);
+       if (convert_is_binary(size, &stats))
+               return CONVERT_STAT_BITS_BIN;
+       else if (stats.crlf && stats.crlf == stats.lf)
+               return CONVERT_STAT_BITS_TXT_CRLF;
+       else if (stats.crlf && stats.lf)
+               return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF;
+       else if (stats.lf)
+               return CONVERT_STAT_BITS_TXT_LF;
+       else
+               return 0;
+}
+
+static const char *gather_convert_stats_ascii(const char *data, unsigned long size)
+{
+       unsigned int convert_stats = gather_convert_stats(data, size);
+
+       if (convert_stats & CONVERT_STAT_BITS_BIN)
+               return "-text";
+       switch (convert_stats) {
+       case CONVERT_STAT_BITS_TXT_LF:
+               return "lf";
+       case CONVERT_STAT_BITS_TXT_CRLF:
+               return "crlf";
+       case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:
+               return "mixed";
+       default:
+               return "none";
+       }
+}
+
+const char *get_cached_convert_stats_ascii(const char *path)
+{
+       const char *ret;
+       unsigned long sz;
+       void *data = read_blob_data_from_cache(path, &sz);
+       ret = gather_convert_stats_ascii(data, sz);
+       free(data);
+       return ret;
+}
+
+const char *get_wt_convert_stats_ascii(const char *path)
+{
+       const char *ret = "";
+       struct strbuf sb = STRBUF_INIT;
+       if (strbuf_read_file(&sb, path, 0) >= 0)
+               ret = gather_convert_stats_ascii(sb.buf, sb.len);
+       strbuf_release(&sb);
+       return ret;
+}
+
 static enum eol output_eol(enum crlf_action crlf_action)
 {
        switch (crlf_action) {
@@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
        gather_stats(src, len, &stats);
 
        if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
-               /*
-                * We're currently not going to even try to convert stuff
-                * that has bare CR characters. Does anybody do that crazy
-                * stuff?
-                */
-               if (stats.cr != stats.crlf)
-                       return 0;
-
-               /*
-                * And add some heuristics for binary vs text, of course...
-                */
-               if (is_binary(len, &stats))
+               if (convert_is_binary(len, &stats))
                        return 0;
 
                if (crlf_action == CRLF_GUESS) {
@@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
                                return 0;
                }
 
-               /* If we have any bare CR characters, we're not going to touch it */
-               if (stats.cr != stats.crlf)
-                       return 0;
-
-               if (is_binary(len, &stats))
+               if (convert_is_binary(len, &stats))
                        return 0;
        }
 
@@ -777,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path)
        return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
 }
 
+const char *get_convert_attr_ascii(const char *path)
+{
+       struct conv_attrs ca;
+       enum crlf_action crlf_action;
+
+       convert_attrs(&ca, path);
+       crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
+       switch (crlf_action) {
+       case CRLF_GUESS:
+               return "";
+       case CRLF_BINARY:
+               return "-text";
+       case CRLF_TEXT:
+               return "text";
+       case CRLF_INPUT:
+               return "text eol=lf";
+       case CRLF_CRLF:
+               return "text=auto eol=crlf";
+       case CRLF_AUTO:
+               return "text=auto";
+       }
+       return "";
+}
+
 int convert_to_git(const char *path, const char *src, size_t len,
                    struct strbuf *dst, enum safe_crlf checksafe)
 {
index d9d853cd3d2f6d94965a2c2fc8e587386402a83b..ccf436bfbf2a89ceb7003ea6dbebf6ae2b6f4a8c 100644 (file)
--- a/convert.h
+++ b/convert.h
@@ -32,6 +32,9 @@ enum eol {
 };
 
 extern enum eol core_eol;
+extern const char *get_cached_convert_stats_ascii(const char *path);
+extern const char *get_wt_convert_stats_ascii(const char *path);
+extern const char *get_convert_attr_ascii(const char *path);
 
 /* returns 1 if *dst was used */
 extern int convert_to_git(const char *path, const char *src, size_t len,
index b3436515044e9d9be04b8164b704106835c31db1..504e5a02a1d930238023bc2fc7a582183f237674 100755 (executable)
@@ -56,21 +56,16 @@ create_gitattributes () {
 }
 
 create_NNO_files () {
-       lfname=$1
-       crlfname=$2
-       lfmixcrlf=$3
-       lfmixcr=$4
-       crlfnul=$5
        for crlf in false true input
        do
                for attr in "" auto text -text lf crlf
                do
                        pfx=NNO_${crlf}_attr_${attr} &&
-                       cp $lfname    ${pfx}_LF.txt &&
-                       cp $crlfname  ${pfx}_CRLF.txt &&
-                       cp $lfmixcrlf ${pfx}_CRLF_mix_LF.txt &&
-                       cp $lfmixcr   ${pfx}_LF_mix_CR.txt &&
-                       cp $crlfnul   ${pfx}_CRLF_nul.txt
+                       cp CRLF_mix_LF ${pfx}_LF.txt &&
+                       cp CRLF_mix_LF ${pfx}_CRLF.txt &&
+                       cp CRLF_mix_LF ${pfx}_CRLF_mix_LF.txt &&
+                       cp CRLF_mix_LF ${pfx}_LF_mix_CR.txt &&
+                       cp CRLF_mix_LF ${pfx}_CRLF_nul.txt
                done
        done
 }
@@ -96,7 +91,7 @@ commit_check_warn () {
        crlfnul=$7
        pfx=crlf_${crlf}_attr_${attr}
        create_gitattributes "$attr" &&
-       for f in LF CRLF repoMIX LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul
+       for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul CRLF_nul
        do
                fname=${pfx}_$f.txt &&
                cp $f $fname &&
@@ -149,6 +144,27 @@ commit_chk_wrnNNO () {
        '
 }
 
+stats_ascii () {
+       case "$1" in
+       LF)
+               echo lf
+               ;;
+       CRLF)
+               echo crlf
+               ;;
+       CRLF_mix_LF)
+               echo mixed
+               ;;
+       LF_mix_CR|CRLF_nul|LF_nul|CRLF_mix_CR)
+               echo "-text"
+               ;;
+       *)
+               echo error_invalid $1
+               ;;
+       esac
+
+}
+
 check_files_in_repo () {
        crlf=$1
        attr=$2
@@ -203,35 +219,83 @@ checkout_files () {
        create_gitattributes $attr &&
        git config core.autocrlf $crlf &&
        pfx=eol_${eol}_crlf_${crlf}_attr_${attr}_ &&
-       src=crlf_false_attr__ &&
        for f in LF CRLF LF_mix_CR CRLF_mix_LF LF_nul
        do
-               rm $src$f.txt &&
+               rm crlf_false_attr__$f.txt &&
                if test -z "$eol"; then
-                       git checkout $src$f.txt
+                       git checkout crlf_false_attr__$f.txt
                else
-                       git -c core.eol=$eol checkout $src$f.txt
+                       git -c core.eol=$eol checkout crlf_false_attr__$f.txt
                fi
        done
 
+       test_expect_success "ls-files --eol $lfname ${pfx}LF.txt" '
+               test_when_finished "rm expect actual" &&
+               sort <<-EOF >expect &&
+               i/crlf w/$(stats_ascii $crlfname) crlf_false_attr__CRLF.txt
+               i/mixed w/$(stats_ascii $lfmixcrlf) crlf_false_attr__CRLF_mix_LF.txt
+               i/lf w/$(stats_ascii $lfname) crlf_false_attr__LF.txt
+               i/-text w/$(stats_ascii $lfmixcr) crlf_false_attr__LF_mix_CR.txt
+               i/-text w/$(stats_ascii $crlfnul) crlf_false_attr__CRLF_nul.txt
+               i/-text w/$(stats_ascii $crlfnul) crlf_false_attr__LF_nul.txt
+               EOF
+               git ls-files --eol crlf_false_attr__* |
+               sed -e "s!attr/[^       ]*!!g" -e "s/   / /g" -e "s/  */ /g" |
+               sort >actual &&
+               test_cmp expect actual
+       '
        test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF" "
-               compare_ws_file $pfx $lfname    ${src}LF.txt
+               compare_ws_file $pfx $lfname    crlf_false_attr__LF.txt
        "
        test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF" "
-               compare_ws_file $pfx $crlfname  ${src}CRLF.txt
+               compare_ws_file $pfx $crlfname  crlf_false_attr__CRLF.txt
        "
        test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=CRLF_mix_LF" "
-               compare_ws_file $pfx $lfmixcrlf ${src}CRLF_mix_LF.txt
+               compare_ws_file $pfx $lfmixcrlf crlf_false_attr__CRLF_mix_LF.txt
        "
        test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_mix_CR" "
-               compare_ws_file $pfx $lfmixcr   ${src}LF_mix_CR.txt
+               compare_ws_file $pfx $lfmixcr   crlf_false_attr__LF_mix_CR.txt
        "
        test_expect_success "checkout core.eol=$eol core.autocrlf=$crlf gitattributes=$attr file=LF_nul" "
-               compare_ws_file $pfx $crlfnul   ${src}LF_nul.txt
+               compare_ws_file $pfx $crlfnul   crlf_false_attr__LF_nul.txt
        "
 }
 
-#######
+# Test control characters
+# NUL SOH CR EOF==^Z
+test_expect_success 'ls-files --eol -o Text/Binary' '
+       test_when_finished "rm expect actual TeBi_*" &&
+       STRT=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA &&
+       STR=$STRT$STRT$STRT$STRT &&
+       printf "${STR}BBB\001" >TeBi_127_S &&
+       printf "${STR}BBBB\001">TeBi_128_S &&
+       printf "${STR}BBB\032" >TeBi_127_E &&
+       printf "\032${STR}BBB" >TeBi_E_127 &&
+       printf "${STR}BBBB\000">TeBi_128_N &&
+       printf "${STR}BBB\012">TeBi_128_L &&
+       printf "${STR}BBB\015">TeBi_127_C &&
+       printf "${STR}BB\015\012" >TeBi_126_CL &&
+       printf "${STR}BB\015\012\015" >TeBi_126_CLC &&
+       sort <<-\EOF >expect &&
+       i/ w/-text TeBi_127_S
+       i/ w/none TeBi_128_S
+       i/ w/none TeBi_127_E
+       i/ w/-text TeBi_E_127
+       i/ w/-text TeBi_128_N
+       i/ w/lf TeBi_128_L
+       i/ w/-text TeBi_127_C
+       i/ w/crlf TeBi_126_CL
+       i/ w/-text TeBi_126_CLC
+       EOF
+       git ls-files --eol -o |
+       sed -n -e "/TeBi_/{s!attr/[     ]*!!g
+       s!      ! !g
+       s!  *! !g
+       p
+       }" | sort >actual &&
+       test_cmp expect actual
+'
+
 test_expect_success 'setup master' '
        echo >.gitattributes &&
        git checkout -b master &&
@@ -480,4 +544,19 @@ checkout_files    native  true  "lf"      LF    CRLF  CRLF_mix_LF  LF_mix_CR
 checkout_files    native  false "crlf"    CRLF  CRLF  CRLF         CRLF_mix_CR  CRLF_nul
 checkout_files    native  true  "crlf"    CRLF  CRLF  CRLF         CRLF_mix_CR  CRLF_nul
 
+# Should be the last test case: remove some files from the worktree
+test_expect_success 'ls-files --eol -d -z' '
+       rm crlf_false_attr__CRLF.txt crlf_false_attr__CRLF_mix_LF.txt crlf_false_attr__LF.txt .gitattributes &&
+       cat >expect <<-\EOF &&
+       i/crlf w/ crlf_false_attr__CRLF.txt
+       i/lf w/ .gitattributes
+       i/lf w/ crlf_false_attr__LF.txt
+       i/mixed w/ crlf_false_attr__CRLF_mix_LF.txt
+       EOF
+       git ls-files --eol -d |
+       sed -e "s!attr/[^       ]*!!g" -e "s/   / /g" -e "s/  */ /g" |
+       sort >actual &&
+       test_cmp expect actual
+'
+
 test_done