status: don't suggest "git rm" or "git add" if not appropriate
[gitweb.git] / sha1_file.c
index 956422ba4a5df5f46caaf85f10e4c85321439272..4ccaf7ac197c28400eddd496abcfc725528ca32b 100644 (file)
@@ -18,6 +18,8 @@
 #include "refs.h"
 #include "pack-revindex.h"
 #include "sha1-lookup.h"
+#include "bulk-checkin.h"
+#include "streaming.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -53,6 +55,8 @@ static struct cached_object empty_tree = {
        0
 };
 
+static struct packed_git *last_found_pack;
+
 static struct cached_object *find_cached_object(const unsigned char *sha1)
 {
        int i;
@@ -225,7 +229,6 @@ char *sha1_pack_index_name(const unsigned char *sha1)
 struct alternate_object_database *alt_odb_list;
 static struct alternate_object_database **alt_odb_tail;
 
-static void read_info_alternates(const char * alternates, int depth);
 static int git_open_noatime(const char *name);
 
 /*
@@ -350,7 +353,7 @@ static void link_alt_odb_entries(const char *alt, const char *ep, int sep,
        }
 }
 
-static void read_info_alternates(const char * relative_base, int depth)
+void read_info_alternates(const char * relative_base, int depth)
 {
        char *map;
        size_t mapsz;
@@ -719,6 +722,8 @@ void free_pack_by_name(const char *pack_name)
                        close_pack_index(p);
                        free(p->bad_object_sha1);
                        *pp = p->next;
+                       if (last_found_pack == p)
+                               last_found_pack = NULL;
                        free(p);
                        return;
                }
@@ -1141,10 +1146,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
        return NULL;
 }
 
-int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
+/*
+ * With an in-core object data in "map", rehash it to make sure the
+ * object name actually matches "sha1" to detect object corruption.
+ * With "map" == NULL, try reading the object named with "sha1" using
+ * the streaming interface and rehash it to do the same.
+ */
+int check_sha1_signature(const unsigned char *sha1, void *map,
+                        unsigned long size, const char *type)
 {
        unsigned char real_sha1[20];
-       hash_sha1_file(map, size, type, real_sha1);
+       enum object_type obj_type;
+       struct git_istream *st;
+       git_SHA_CTX c;
+       char hdr[32];
+       int hdrlen;
+
+       if (map) {
+               hash_sha1_file(map, size, type, real_sha1);
+               return hashcmp(sha1, real_sha1) ? -1 : 0;
+       }
+
+       st = open_istream(sha1, &obj_type, &size, NULL);
+       if (!st)
+               return -1;
+
+       /* Generate the header */
+       hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1;
+
+       /* Sha1.. */
+       git_SHA1_Init(&c);
+       git_SHA1_Update(&c, hdr, hdrlen);
+       for (;;) {
+               char buf[1024 * 16];
+               ssize_t readlen = read_istream(st, buf, sizeof(buf));
+
+               if (!readlen)
+                       break;
+               git_SHA1_Update(&c, buf, readlen);
+       }
+       git_SHA1_Final(real_sha1, &c);
+       close_istream(st);
        return hashcmp(sha1, real_sha1) ? -1 : 0;
 }
 
@@ -1201,6 +1243,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 
                if (!fstat(fd, &st)) {
                        *size = xsize_t(st.st_size);
+                       if (!*size) {
+                               /* mmap() is forbidden on empty files */
+                               error("object file %s is empty", sha1_file_name(sha1));
+                               return NULL;
+                       }
                        map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
                }
                close(fd);
@@ -2009,54 +2056,58 @@ int is_pack_valid(struct packed_git *p)
        return !open_packed_git(p);
 }
 
+static int fill_pack_entry(const unsigned char *sha1,
+                          struct pack_entry *e,
+                          struct packed_git *p)
+{
+       off_t offset;
+
+       if (p->num_bad_objects) {
+               unsigned i;
+               for (i = 0; i < p->num_bad_objects; i++)
+                       if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
+                               return 0;
+       }
+
+       offset = find_pack_entry_one(sha1, p);
+       if (!offset)
+               return 0;
+
+       /*
+        * We are about to tell the caller where they can locate the
+        * requested object.  We better make sure the packfile is
+        * still here and can be accessed before supplying that
+        * answer, as it may have been deleted since the index was
+        * loaded!
+        */
+       if (!is_pack_valid(p)) {
+               warning("packfile %s cannot be accessed", p->pack_name);
+               return 0;
+       }
+       e->offset = offset;
+       e->p = p;
+       hashcpy(e->sha1, sha1);
+       return 1;
+}
+
 static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
 {
-       static struct packed_git *last_found = (void *)1;
        struct packed_git *p;
-       off_t offset;
 
        prepare_packed_git();
        if (!packed_git)
                return 0;
-       p = (last_found == (void *)1) ? packed_git : last_found;
 
-       do {
-               if (p->num_bad_objects) {
-                       unsigned i;
-                       for (i = 0; i < p->num_bad_objects; i++)
-                               if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
-                                       goto next;
-               }
+       if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+               return 1;
 
-               offset = find_pack_entry_one(sha1, p);
-               if (offset) {
-                       /*
-                        * We are about to tell the caller where they can
-                        * locate the requested object.  We better make
-                        * sure the packfile is still here and can be
-                        * accessed before supplying that answer, as
-                        * it may have been deleted since the index
-                        * was loaded!
-                        */
-                       if (!is_pack_valid(p)) {
-                               warning("packfile %s cannot be accessed", p->pack_name);
-                               goto next;
-                       }
-                       e->offset = offset;
-                       e->p = p;
-                       hashcpy(e->sha1, sha1);
-                       last_found = p;
-                       return 1;
-               }
+       for (p = packed_git; p; p = p->next) {
+               if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
+                       continue;
 
-               next:
-               if (p == last_found)
-                       p = packed_git;
-               else
-                       p = p->next;
-               if (p == last_found)
-                       p = p->next;
-       } while (p);
+               last_found_pack = p;
+               return 1;
+       }
        return 0;
 }
 
@@ -2365,7 +2416,7 @@ int move_temp_to_file(const char *tmpfile, const char *filename)
        unlink_or_warn(tmpfile);
        if (ret) {
                if (ret != EEXIST) {
-                       return error("unable to write sha1 filename %s: %s\n", filename, strerror(ret));
+                       return error("unable to write sha1 filename %s: %s", filename, strerror(ret));
                }
                /* FIXME!!! Collision check here ? */
        }
@@ -2451,15 +2502,15 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
        git_SHA_CTX c;
        unsigned char parano_sha1[20];
        char *filename;
-       static char tmpfile[PATH_MAX];
+       static char tmp_file[PATH_MAX];
 
        filename = sha1_file_name(sha1);
-       fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
+       fd = create_tmpfile(tmp_file, sizeof(tmp_file), filename);
        if (fd < 0) {
                if (errno == EACCES)
-                       return error("insufficient permission for adding an object to repository database %s\n", get_object_directory());
+                       return error("insufficient permission for adding an object to repository database %s", get_object_directory());
                else
-                       return error("unable to create temporary sha1 filename %s: %s\n", tmpfile, strerror(errno));
+                       return error("unable to create temporary file: %s", strerror(errno));
        }
 
        /* Set it up */
@@ -2504,12 +2555,12 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
                struct utimbuf utb;
                utb.actime = mtime;
                utb.modtime = mtime;
-               if (utime(tmpfile, &utb) < 0)
+               if (utime(tmp_file, &utb) < 0)
                        warning("failed utime() on %s: %s",
-                               tmpfile, strerror(errno));
+                               tmp_file, strerror(errno));
        }
 
-       return move_temp_to_file(tmpfile, filename);
+       return move_temp_to_file(tmp_file, filename);
 }
 
 int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
@@ -2680,82 +2731,25 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
 }
 
 /*
- * This creates one packfile per large blob, because the caller
- * immediately wants the result sha1, and fast-import can report the
- * object name via marks mechanism only by closing the created
- * packfile.
+ * This creates one packfile per large blob unless bulk-checkin
+ * machinery is "plugged".
  *
  * This also bypasses the usual "convert-to-git" dance, and that is on
  * purpose. We could write a streaming version of the converting
  * functions and insert that before feeding the data to fast-import
- * (or equivalent in-core API described above), but the primary
- * motivation for trying to stream from the working tree file and to
- * avoid mmaping it in core is to deal with large binary blobs, and
- * by definition they do _not_ want to get any conversion.
+ * (or equivalent in-core API described above). However, that is
+ * somewhat complicated, as we do not know the size of the filter
+ * result, which we need to know beforehand when writing a git object.
+ * Since the primary motivation for trying to stream from the working
+ * tree file and to avoid mmaping it in core is to deal with large
+ * binary blobs, they generally do not want to get any conversion, and
+ * callers should avoid this code path when filters are requested.
  */
 static int index_stream(unsigned char *sha1, int fd, size_t size,
                        enum object_type type, const char *path,
                        unsigned flags)
 {
-       struct child_process fast_import;
-       char export_marks[512];
-       const char *argv[] = { "fast-import", "--quiet", export_marks, NULL };
-       char tmpfile[512];
-       char fast_import_cmd[512];
-       char buf[512];
-       int len, tmpfd;
-
-       strcpy(tmpfile, git_path("hashstream_XXXXXX"));
-       tmpfd = git_mkstemp_mode(tmpfile, 0600);
-       if (tmpfd < 0)
-               die_errno("cannot create tempfile: %s", tmpfile);
-       if (close(tmpfd))
-               die_errno("cannot close tempfile: %s", tmpfile);
-       sprintf(export_marks, "--export-marks=%s", tmpfile);
-
-       memset(&fast_import, 0, sizeof(fast_import));
-       fast_import.in = -1;
-       fast_import.argv = argv;
-       fast_import.git_cmd = 1;
-       if (start_command(&fast_import))
-               die_errno("index-stream: git fast-import failed");
-
-       len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n",
-                     (unsigned long) size);
-       write_or_whine(fast_import.in, fast_import_cmd, len,
-                      "index-stream: feeding fast-import");
-       while (size) {
-               char buf[10240];
-               size_t sz = size < sizeof(buf) ? size : sizeof(buf);
-               ssize_t actual;
-
-               actual = read_in_full(fd, buf, sz);
-               if (actual < 0)
-                       die_errno("index-stream: reading input");
-               if (write_in_full(fast_import.in, buf, actual) != actual)
-                       die_errno("index-stream: feeding fast-import");
-               size -= actual;
-       }
-       if (close(fast_import.in))
-               die_errno("index-stream: closing fast-import");
-       if (finish_command(&fast_import))
-               die_errno("index-stream: finishing fast-import");
-
-       tmpfd = open(tmpfile, O_RDONLY);
-       if (tmpfd < 0)
-               die_errno("index-stream: cannot open fast-import mark");
-       len = read(tmpfd, buf, sizeof(buf));
-       if (len < 0)
-               die_errno("index-stream: reading fast-import mark");
-       if (close(tmpfd) < 0)
-               die_errno("index-stream: closing fast-import mark");
-       if (unlink(tmpfile))
-               die_errno("index-stream: unlinking fast-import mark");
-       if (len != 44 ||
-           memcmp(":1 ", buf, 3) ||
-           get_sha1_hex(buf + 3, sha1))
-               die_errno("index-stream: unexpected fast-import mark: <%s>", buf);
-       return 0;
+       return index_bulk_checkin(sha1, fd, size, type, path, flags);
 }
 
 int index_fd(unsigned char *sha1, int fd, struct stat *st,
@@ -2766,7 +2760,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
 
        if (!S_ISREG(st->st_mode))
                ret = index_pipe(sha1, fd, type, path, flags);
-       else if (size <= big_file_threshold || type != OBJ_BLOB)
+       else if (size <= big_file_threshold || type != OBJ_BLOB ||
+                (path && would_convert_to_git(path, NULL, 0, 0)))
                ret = index_core(sha1, fd, size, type, path, flags);
        else
                ret = index_stream(sha1, fd, size, type, path, flags);