sha1_file: add for_each iterators for loose and packed objects
[gitweb.git] / sha1_file.c
index 3f70b1d86aaa8e0648d8a3c6ebb6aca701ae7475..55c65b7ef69a288c99051506485d1bd97d09d479 100644 (file)
@@ -350,7 +350,7 @@ static void link_alt_odb_entries(const char *alt, int len, int sep,
                return;
        }
 
-       strbuf_addstr(&objdirbuf, absolute_path(get_object_directory()));
+       strbuf_add_absolute_path(&objdirbuf, get_object_directory());
        normalize_path_copy(objdirbuf.buf, objdirbuf.buf);
 
        alt_copy = xmemdupz(alt, len);
@@ -412,14 +412,18 @@ void add_to_alternates_file(const char *reference)
                link_alt_odb_entries(alt, strlen(alt), '\n', NULL, 0);
 }
 
-void foreach_alt_odb(alt_odb_fn fn, void *cb)
+int foreach_alt_odb(alt_odb_fn fn, void *cb)
 {
        struct alternate_object_database *ent;
+       int r = 0;
 
        prepare_alt_odb();
-       for (ent = alt_odb_list; ent; ent = ent->next)
-               if (fn(ent, cb))
-                       return;
+       for (ent = alt_odb_list; ent; ent = ent->next) {
+               r = fn(ent, cb);
+               if (r)
+                       break;
+       }
+       return r;
 }
 
 void prepare_alt_odb(void)
@@ -663,10 +667,26 @@ void release_pack_memory(size_t need)
                ; /* nothing */
 }
 
+static void mmap_limit_check(size_t length)
+{
+       static size_t limit = 0;
+       if (!limit) {
+               limit = git_env_ulong("GIT_MMAP_LIMIT", 0);
+               if (!limit)
+                       limit = SIZE_MAX;
+       }
+       if (length > limit)
+               die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX,
+                   (uintmax_t)length, (uintmax_t)limit);
+}
+
 void *xmmap(void *start, size_t length,
        int prot, int flags, int fd, off_t offset)
 {
-       void *ret = mmap(start, length, prot, flags, fd, offset);
+       void *ret;
+
+       mmap_limit_check(length);
+       ret = mmap(start, length, prot, flags, fd, offset);
        if (ret == MAP_FAILED) {
                if (!length)
                        return NULL;
@@ -1923,7 +1943,9 @@ static void *unpack_compressed_entry(struct packed_git *p,
        git_zstream stream;
        unsigned char *buffer, *in;
 
-       buffer = xmallocz(size);
+       buffer = xmallocz_gently(size);
+       if (!buffer)
+               return NULL;
        memset(&stream, 0, sizeof(stream));
        stream.next_out = buffer;
        stream.avail_out = size + 1;
@@ -3074,6 +3096,29 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
        return ret;
 }
 
+static int index_stream_convert_blob(unsigned char *sha1, int fd,
+                                    const char *path, unsigned flags)
+{
+       int ret;
+       const int write_object = flags & HASH_WRITE_OBJECT;
+       struct strbuf sbuf = STRBUF_INIT;
+
+       assert(path);
+       assert(would_convert_to_git_filter_fd(path));
+
+       convert_to_git_filter_fd(path, fd, &sbuf,
+                                write_object ? safe_crlf : SAFE_CRLF_FALSE);
+
+       if (write_object)
+               ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
+                                     sha1);
+       else
+               ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
+                                    sha1);
+       strbuf_release(&sbuf);
+       return ret;
+}
+
 static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
                      const char *path, unsigned flags)
 {
@@ -3139,15 +3184,22 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
             enum object_type type, const char *path, unsigned flags)
 {
        int ret;
-       size_t size = xsize_t(st->st_size);
 
-       if (!S_ISREG(st->st_mode))
+       /*
+        * Call xsize_t() only when needed to avoid potentially unnecessary
+        * die() for large files.
+        */
+       if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
+               ret = index_stream_convert_blob(sha1, fd, path, flags);
+       else if (!S_ISREG(st->st_mode))
                ret = index_pipe(sha1, fd, type, path, flags);
-       else if (size <= big_file_threshold || type != OBJ_BLOB ||
-                (path && would_convert_to_git(path, NULL, 0, 0)))
-               ret = index_core(sha1, fd, size, type, path, flags);
+       else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
+                (path && would_convert_to_git(path)))
+               ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
+                                flags);
        else
-               ret = index_stream(sha1, fd, size, type, path, flags);
+               ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path,
+                                  flags);
        close(fd);
        return ret;
 }
@@ -3212,3 +3264,149 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
                die("%s is not a valid '%s' object", sha1_to_hex(sha1),
                    typename(expect));
 }
+
+static int for_each_file_in_obj_subdir(int subdir_nr,
+                                      struct strbuf *path,
+                                      each_loose_object_fn obj_cb,
+                                      each_loose_cruft_fn cruft_cb,
+                                      each_loose_subdir_fn subdir_cb,
+                                      void *data)
+{
+       size_t baselen = path->len;
+       DIR *dir = opendir(path->buf);
+       struct dirent *de;
+       int r = 0;
+
+       if (!dir) {
+               if (errno == ENOENT)
+                       return 0;
+               return error("unable to open %s: %s", path->buf, strerror(errno));
+       }
+
+       while ((de = readdir(dir))) {
+               if (is_dot_or_dotdot(de->d_name))
+                       continue;
+
+               strbuf_setlen(path, baselen);
+               strbuf_addf(path, "/%s", de->d_name);
+
+               if (strlen(de->d_name) == 38)  {
+                       char hex[41];
+                       unsigned char sha1[20];
+
+                       snprintf(hex, sizeof(hex), "%02x%s",
+                                subdir_nr, de->d_name);
+                       if (!get_sha1_hex(hex, sha1)) {
+                               if (obj_cb) {
+                                       r = obj_cb(sha1, path->buf, data);
+                                       if (r)
+                                               break;
+                               }
+                               continue;
+                       }
+               }
+
+               if (cruft_cb) {
+                       r = cruft_cb(de->d_name, path->buf, data);
+                       if (r)
+                               break;
+               }
+       }
+       strbuf_setlen(path, baselen);
+
+       if (!r && subdir_cb)
+               r = subdir_cb(subdir_nr, path->buf, data);
+
+       closedir(dir);
+       return r;
+}
+
+int for_each_loose_file_in_objdir(const char *path,
+                           each_loose_object_fn obj_cb,
+                           each_loose_cruft_fn cruft_cb,
+                           each_loose_subdir_fn subdir_cb,
+                           void *data)
+{
+       struct strbuf buf = STRBUF_INIT;
+       size_t baselen;
+       int r = 0;
+       int i;
+
+       strbuf_addstr(&buf, path);
+       strbuf_addch(&buf, '/');
+       baselen = buf.len;
+
+       for (i = 0; i < 256; i++) {
+               strbuf_addf(&buf, "%02x", i);
+               r = for_each_file_in_obj_subdir(i, &buf, obj_cb, cruft_cb,
+                                               subdir_cb, data);
+               strbuf_setlen(&buf, baselen);
+               if (r)
+                       break;
+       }
+
+       strbuf_release(&buf);
+       return r;
+}
+
+struct loose_alt_odb_data {
+       each_loose_object_fn *cb;
+       void *data;
+};
+
+static int loose_from_alt_odb(struct alternate_object_database *alt,
+                             void *vdata)
+{
+       struct loose_alt_odb_data *data = vdata;
+       return for_each_loose_file_in_objdir(alt->base,
+                                            data->cb, NULL, NULL,
+                                            data->data);
+}
+
+int for_each_loose_object(each_loose_object_fn cb, void *data)
+{
+       struct loose_alt_odb_data alt;
+       int r;
+
+       r = for_each_loose_file_in_objdir(get_object_directory(),
+                                         cb, NULL, NULL, data);
+       if (r)
+               return r;
+
+       alt.cb = cb;
+       alt.data = data;
+       return foreach_alt_odb(loose_from_alt_odb, &alt);
+}
+
+static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
+{
+       uint32_t i;
+       int r = 0;
+
+       for (i = 0; i < p->num_objects; i++) {
+               const unsigned char *sha1 = nth_packed_object_sha1(p, i);
+
+               if (!sha1)
+                       return error("unable to get sha1 of object %u in %s",
+                                    i, p->pack_name);
+
+               r = cb(sha1, p, i, data);
+               if (r)
+                       break;
+       }
+       return r;
+}
+
+int for_each_packed_object(each_packed_object_fn cb, void *data)
+{
+       struct packed_git *p;
+       int r = 0;
+
+       prepare_packed_git();
+       for (p = packed_git; p; p = p->next) {
+               r = for_each_object_in_pack(p, cb, data);
+               if (r)
+                       break;
+       }
+       return r;
+}