Merge branch 'master' of git://repo.or.cz/git/fastimport
[gitweb.git] / convert.c
index 898bfe3eb219618e746afbe02eb4a8756eccc6aa..ad106ef35fb0729709b243d6292980f15cd1b0a5 100644 (file)
--- a/convert.c
+++ b/convert.c
@@ -1,4 +1,6 @@
 #include "cache.h"
+#include "attr.h"
+
 /*
  * convert.c - convert a file when checking it out and checking it in.
  *
@@ -8,6 +10,11 @@
  * translation when the "auto_crlf" option is set.
  */
 
+#define CRLF_GUESS     (-1)
+#define CRLF_BINARY    0
+#define CRLF_TEXT      1
+#define CRLF_INPUT     2
+
 struct text_stat {
        /* CR, LF and CRLF counts */
        unsigned cr, lf, crlf;
@@ -72,115 +79,171 @@ static int is_binary(unsigned long size, struct text_stat *stats)
        return 0;
 }
 
-int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
+static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep, int action)
 {
-       char *buffer, *nbuf;
+       char *buffer, *dst;
        unsigned long size, nsize;
        struct text_stat stats;
 
-       /*
-        * FIXME! Other pluggable conversions should go here,
-        * based on filename patterns. Right now we just do the
-        * stupid auto-CRLF one.
-        */
-       if (!auto_crlf)
-               return 0;
+       if ((action == CRLF_BINARY) || (action == CRLF_GUESS && !auto_crlf))
+               return NULL;
 
        size = *sizep;
        if (!size)
-               return 0;
-       buffer = *bufp;
+               return NULL;
 
-       gather_stats(buffer, size, &stats);
+       gather_stats(src, size, &stats);
 
        /* No CR? Nothing to convert, regardless. */
        if (!stats.cr)
-               return 0;
-
-       /*
-        * We're currently not going to even try to convert stuff
-        * that has bare CR characters. Does anybody do that crazy
-        * stuff?
-        */
-       if (stats.cr != stats.crlf)
-               return 0;
-
-       /*
-        * And add some heuristics for binary vs text, of course...
-        */
-       if (is_binary(size, &stats))
-               return 0;
+               return NULL;
+
+       if (action == CRLF_GUESS) {
+               /*
+                * We're currently not going to even try to convert stuff
+                * that has bare CR characters. Does anybody do that crazy
+                * stuff?
+                */
+               if (stats.cr != stats.crlf)
+                       return NULL;
+
+               /*
+                * And add some heuristics for binary vs text, of course...
+                */
+               if (is_binary(size, &stats))
+                       return NULL;
+       }
 
        /*
-        * Ok, allocate a new buffer, fill it in, and return true
-        * to let the caller know that we switched buffers on it.
+        * Ok, allocate a new buffer, fill it in, and return it
+        * to let the caller know that we switched buffers.
         */
        nsize = size - stats.crlf;
-       nbuf = xmalloc(nsize);
-       *bufp = nbuf;
+       buffer = xmalloc(nsize);
        *sizep = nsize;
-       do {
-               unsigned char c = *buffer++;
-               if (c != '\r')
-                       *nbuf++ = c;
-       } while (--size);
 
-       return 1;
+       dst = buffer;
+       if (action == CRLF_GUESS) {
+               /*
+                * If we guessed, we already know we rejected a file with
+                * lone CR, and we can strip a CR without looking at what
+                * follow it.
+                */
+               do {
+                       unsigned char c = *src++;
+                       if (c != '\r')
+                               *dst++ = c;
+               } while (--size);
+       } else {
+               do {
+                       unsigned char c = *src++;
+                       if (! (c == '\r' && (1 < size && *src == '\n')))
+                               *dst++ = c;
+               } while (--size);
+       }
+
+       return buffer;
 }
 
-int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+static char *crlf_to_worktree(const char *path, const char *src, unsigned long *sizep, int action)
 {
-       char *buffer, *nbuf;
+       char *buffer, *dst;
        unsigned long size, nsize;
        struct text_stat stats;
        unsigned char last;
 
-       /*
-        * FIXME! Other pluggable conversions should go here,
-        * based on filename patterns. Right now we just do the
-        * stupid auto-CRLF one.
-        */
-       if (auto_crlf <= 0)
-               return 0;
+       if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
+           (action == CRLF_GUESS && auto_crlf <= 0))
+               return NULL;
 
        size = *sizep;
        if (!size)
-               return 0;
-       buffer = *bufp;
+               return NULL;
 
-       gather_stats(buffer, size, &stats);
+       gather_stats(src, size, &stats);
 
        /* No LF? Nothing to convert, regardless. */
        if (!stats.lf)
-               return 0;
+               return NULL;
 
        /* Was it already in CRLF format? */
        if (stats.lf == stats.crlf)
-               return 0;
+               return NULL;
 
-       /* If we have any bare CR characters, we're not going to touch it */
-       if (stats.cr != stats.crlf)
-               return 0;
+       if (action == CRLF_GUESS) {
+               /* If we have any bare CR characters, we're not going to touch it */
+               if (stats.cr != stats.crlf)
+                       return NULL;
 
-       if (is_binary(size, &stats))
-               return 0;
+               if (is_binary(size, &stats))
+                       return NULL;
+       }
 
        /*
-        * Ok, allocate a new buffer, fill it in, and return true
-        * to let the caller know that we switched buffers on it.
+        * Ok, allocate a new buffer, fill it in, and return it
+        * to let the caller know that we switched buffers.
         */
        nsize = size + stats.lf - stats.crlf;
-       nbuf = xmalloc(nsize);
-       *bufp = nbuf;
+       buffer = xmalloc(nsize);
        *sizep = nsize;
        last = 0;
+
+       dst = buffer;
        do {
-               unsigned char c = *buffer++;
+               unsigned char c = *src++;
                if (c == '\n' && last != '\r')
-                       *nbuf++ = '\r';
-               *nbuf++ = c;
+                       *dst++ = '\r';
+               *dst++ = c;
                last = c;
        } while (--size);
 
-       return 1;
+       return buffer;
+}
+
+static void setup_convert_check(struct git_attr_check *check)
+{
+       static struct git_attr *attr_crlf;
+
+       if (!attr_crlf)
+               attr_crlf = git_attr("crlf", 4);
+       check->attr = attr_crlf;
+}
+
+static int git_path_check_crlf(const char *path, struct git_attr_check *check)
+{
+       const char *value = check->value;
+
+       if (ATTR_TRUE(value))
+               return CRLF_TEXT;
+       else if (ATTR_FALSE(value))
+               return CRLF_BINARY;
+       else if (ATTR_UNSET(value))
+               ;
+       else if (!strcmp(value, "input"))
+               return CRLF_INPUT;
+       return CRLF_GUESS;
+}
+
+char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
+{
+       struct git_attr_check check[1];
+       int crlf = CRLF_GUESS;
+
+       setup_convert_check(check);
+       if (!git_checkattr(path, 1, check)) {
+               crlf = git_path_check_crlf(path, check);
+       }
+       return crlf_to_git(path, src, sizep, crlf);
+}
+
+char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
+{
+       struct git_attr_check check[1];
+       int crlf = CRLF_GUESS;
+
+       setup_convert_check(check);
+       if (!git_checkattr(path, 1, check)) {
+               crlf = git_path_check_crlf(path, check);
+       }
+       return crlf_to_worktree(path, src, sizep, crlf);
 }