Merge branch 'mt/dir-iterator-updates'
authorJunio C Hamano <gitster@pobox.com>
Thu, 25 Jul 2019 20:59:22 +0000 (13:59 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 25 Jul 2019 20:59:22 +0000 (13:59 -0700)
Adjust the dir-iterator API and apply it to the local clone
optimization codepath.

* mt/dir-iterator-updates:
clone: replace strcmp by fspathcmp
clone: use dir-iterator to avoid explicit dir traversal
clone: extract function from copy_or_link_directory
clone: copy hidden paths at local clone
dir-iterator: add flags parameter to dir_iterator_begin
dir-iterator: refactor state machine model
dir-iterator: use warning_errno when possible
dir-iterator: add tests for dir-iterator API
clone: better handle symlinked files at .git/objects/
clone: test for our behavior on odd objects/* content

Makefile
builtin/clone.c
dir-iterator.c
dir-iterator.h
refs/files-backend.c
t/helper/test-dir-iterator.c [new file with mode: 0644]
t/helper/test-tool.c
t/helper/test-tool.h
t/t0066-dir-iterator.sh [new file with mode: 0755]
t/t5604-clone-reference.sh
index b11cdd4fe79234b57211d236930b0798416b8e6c..bd246f2989575a72758fd3765659f61a8aa58c8c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -704,6 +704,7 @@ TEST_BUILTINS_OBJS += test-config.o
 TEST_BUILTINS_OBJS += test-ctype.o
 TEST_BUILTINS_OBJS += test-date.o
 TEST_BUILTINS_OBJS += test-delta.o
+TEST_BUILTINS_OBJS += test-dir-iterator.o
 TEST_BUILTINS_OBJS += test-drop-caches.o
 TEST_BUILTINS_OBJS += test-dump-cache-tree.o
 TEST_BUILTINS_OBJS += test-dump-fsmonitor.o
index a4fe72879d43e4e42d6cbb5a4dada2f7a111b872..f665b28ccccfacaf5dfe84b7f94081e1afacdd49 100644 (file)
@@ -23,6 +23,8 @@
 #include "transport.h"
 #include "strbuf.h"
 #include "dir.h"
+#include "dir-iterator.h"
+#include "iterator.h"
 #include "sigchain.h"
 #include "branch.h"
 #include "remote.h"
@@ -394,50 +396,55 @@ static void copy_alternates(struct strbuf *src, const char *src_repo)
        fclose(in);
 }
 
+static void mkdir_if_missing(const char *pathname, mode_t mode)
+{
+       struct stat st;
+
+       if (!mkdir(pathname, mode))
+               return;
+
+       if (errno != EEXIST)
+               die_errno(_("failed to create directory '%s'"), pathname);
+       else if (stat(pathname, &st))
+               die_errno(_("failed to stat '%s'"), pathname);
+       else if (!S_ISDIR(st.st_mode))
+               die(_("%s exists and is not a directory"), pathname);
+}
+
 static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
-                                  const char *src_repo, int src_baselen)
+                                  const char *src_repo)
 {
-       struct dirent *de;
-       struct stat buf;
        int src_len, dest_len;
-       DIR *dir;
-
-       dir = opendir(src->buf);
-       if (!dir)
-               die_errno(_("failed to open '%s'"), src->buf);
-
-       if (mkdir(dest->buf, 0777)) {
-               if (errno != EEXIST)
-                       die_errno(_("failed to create directory '%s'"), dest->buf);
-               else if (stat(dest->buf, &buf))
-                       die_errno(_("failed to stat '%s'"), dest->buf);
-               else if (!S_ISDIR(buf.st_mode))
-                       die(_("%s exists and is not a directory"), dest->buf);
-       }
+       struct dir_iterator *iter;
+       int iter_status;
+       unsigned int flags;
+
+       mkdir_if_missing(dest->buf, 0777);
+
+       flags = DIR_ITERATOR_PEDANTIC | DIR_ITERATOR_FOLLOW_SYMLINKS;
+       iter = dir_iterator_begin(src->buf, flags);
+
+       if (!iter)
+               die_errno(_("failed to start iterator over '%s'"), src->buf);
 
        strbuf_addch(src, '/');
        src_len = src->len;
        strbuf_addch(dest, '/');
        dest_len = dest->len;
 
-       while ((de = readdir(dir)) != NULL) {
+       while ((iter_status = dir_iterator_advance(iter)) == ITER_OK) {
                strbuf_setlen(src, src_len);
-               strbuf_addstr(src, de->d_name);
+               strbuf_addstr(src, iter->relative_path);
                strbuf_setlen(dest, dest_len);
-               strbuf_addstr(dest, de->d_name);
-               if (stat(src->buf, &buf)) {
-                       warning (_("failed to stat %s\n"), src->buf);
-                       continue;
-               }
-               if (S_ISDIR(buf.st_mode)) {
-                       if (de->d_name[0] != '.')
-                               copy_or_link_directory(src, dest,
-                                                      src_repo, src_baselen);
+               strbuf_addstr(dest, iter->relative_path);
+
+               if (S_ISDIR(iter->st.st_mode)) {
+                       mkdir_if_missing(dest->buf, 0777);
                        continue;
                }
 
                /* Files that cannot be copied bit-for-bit... */
-               if (!strcmp(src->buf + src_baselen, "/info/alternates")) {
+               if (!fspathcmp(iter->relative_path, "info/alternates")) {
                        copy_alternates(src, src_repo);
                        continue;
                }
@@ -445,7 +452,7 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
                if (unlink(dest->buf) && errno != ENOENT)
                        die_errno(_("failed to unlink '%s'"), dest->buf);
                if (!option_no_hardlinks) {
-                       if (!link(src->buf, dest->buf))
+                       if (!link(real_path(src->buf), dest->buf))
                                continue;
                        if (option_local > 0)
                                die_errno(_("failed to create link '%s'"), dest->buf);
@@ -454,7 +461,11 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
                if (copy_file_with_time(dest->buf, src->buf, 0666))
                        die_errno(_("failed to copy file to '%s'"), dest->buf);
        }
-       closedir(dir);
+
+       if (iter_status != ITER_DONE) {
+               strbuf_setlen(src, src_len);
+               die(_("failed to iterate over '%s'"), src->buf);
+       }
 }
 
 static void clone_local(const char *src_repo, const char *dest_repo)
@@ -472,7 +483,7 @@ static void clone_local(const char *src_repo, const char *dest_repo)
                get_common_dir(&dest, dest_repo);
                strbuf_addstr(&src, "/objects");
                strbuf_addstr(&dest, "/objects");
-               copy_or_link_directory(&src, &dest, src_repo, src.len);
+               copy_or_link_directory(&src, &dest, src_repo);
                strbuf_release(&src);
                strbuf_release(&dest);
        }
index f2dcd82fde9723be1e53770ad25af42ec547869a..b17e9f970a747a3ac85aae81a92c38d2a6491546 100644 (file)
@@ -4,8 +4,6 @@
 #include "dir-iterator.h"
 
 struct dir_iterator_level {
-       int initialized;
-
        DIR *dir;
 
        /*
@@ -13,16 +11,6 @@ struct dir_iterator_level {
         * (including a trailing '/'):
         */
        size_t prefix_len;
-
-       /*
-        * The last action that has been taken with the current entry
-        * (needed for directories, which have to be included in the
-        * iteration and also iterated into):
-        */
-       enum {
-               DIR_STATE_ITER,
-               DIR_STATE_RECURSE
-       } dir_state;
 };
 
 /*
@@ -34,9 +22,11 @@ struct dir_iterator_int {
        struct dir_iterator base;
 
        /*
-        * The number of levels currently on the stack. This is always
-        * at least 1, because when it becomes zero the iteration is
-        * ended and this struct is freed.
+        * The number of levels currently on the stack. After the first
+        * call to dir_iterator_begin(), if it succeeds to open the
+        * first level's dir, this will always be at least 1. Then,
+        * when it comes to zero the iteration is ended and this
+        * struct is freed.
         */
        size_t levels_nr;
 
@@ -48,117 +38,142 @@ struct dir_iterator_int {
         * that will be included in this iteration.
         */
        struct dir_iterator_level *levels;
+
+       /* Combination of flags for this dir-iterator */
+       unsigned int flags;
 };
 
+/*
+ * Push a level in the iter stack and initialize it with information from
+ * the directory pointed by iter->base->path. It is assumed that this
+ * strbuf points to a valid directory path. Return 0 on success and -1
+ * otherwise, setting errno accordingly and leaving the stack unchanged.
+ */
+static int push_level(struct dir_iterator_int *iter)
+{
+       struct dir_iterator_level *level;
+
+       ALLOC_GROW(iter->levels, iter->levels_nr + 1, iter->levels_alloc);
+       level = &iter->levels[iter->levels_nr++];
+
+       if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1]))
+               strbuf_addch(&iter->base.path, '/');
+       level->prefix_len = iter->base.path.len;
+
+       level->dir = opendir(iter->base.path.buf);
+       if (!level->dir) {
+               int saved_errno = errno;
+               if (errno != ENOENT) {
+                       warning_errno("error opening directory '%s'",
+                                     iter->base.path.buf);
+               }
+               iter->levels_nr--;
+               errno = saved_errno;
+               return -1;
+       }
+
+       return 0;
+}
+
+/*
+ * Pop the top level on the iter stack, releasing any resources associated
+ * with it. Return the new value of iter->levels_nr.
+ */
+static int pop_level(struct dir_iterator_int *iter)
+{
+       struct dir_iterator_level *level =
+               &iter->levels[iter->levels_nr - 1];
+
+       if (level->dir && closedir(level->dir))
+               warning_errno("error closing directory '%s'",
+                             iter->base.path.buf);
+       level->dir = NULL;
+
+       return --iter->levels_nr;
+}
+
+/*
+ * Populate iter->base with the necessary information on the next iteration
+ * entry, represented by the given dirent de. Return 0 on success and -1
+ * otherwise, setting errno accordingly.
+ */
+static int prepare_next_entry_data(struct dir_iterator_int *iter,
+                                  struct dirent *de)
+{
+       int err, saved_errno;
+
+       strbuf_addstr(&iter->base.path, de->d_name);
+       /*
+        * We have to reset these because the path strbuf might have
+        * been realloc()ed at the previous strbuf_addstr().
+        */
+       iter->base.relative_path = iter->base.path.buf +
+                                  iter->levels[0].prefix_len;
+       iter->base.basename = iter->base.path.buf +
+                             iter->levels[iter->levels_nr - 1].prefix_len;
+
+       if (iter->flags & DIR_ITERATOR_FOLLOW_SYMLINKS)
+               err = stat(iter->base.path.buf, &iter->base.st);
+       else
+               err = lstat(iter->base.path.buf, &iter->base.st);
+
+       saved_errno = errno;
+       if (err && errno != ENOENT)
+               warning_errno("failed to stat '%s'", iter->base.path.buf);
+
+       errno = saved_errno;
+       return err;
+}
+
 int dir_iterator_advance(struct dir_iterator *dir_iterator)
 {
        struct dir_iterator_int *iter =
                (struct dir_iterator_int *)dir_iterator;
 
+       if (S_ISDIR(iter->base.st.st_mode) && push_level(iter)) {
+               if (errno != ENOENT && iter->flags & DIR_ITERATOR_PEDANTIC)
+                       goto error_out;
+               if (iter->levels_nr == 0)
+                       goto error_out;
+       }
+
+       /* Loop until we find an entry that we can give back to the caller. */
        while (1) {
+               struct dirent *de;
                struct dir_iterator_level *level =
                        &iter->levels[iter->levels_nr - 1];
-               struct dirent *de;
 
-               if (!level->initialized) {
-                       /*
-                        * Note: dir_iterator_begin() ensures that
-                        * path is not the empty string.
-                        */
-                       if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1]))
-                               strbuf_addch(&iter->base.path, '/');
-                       level->prefix_len = iter->base.path.len;
-
-                       level->dir = opendir(iter->base.path.buf);
-                       if (!level->dir && errno != ENOENT) {
-                               warning("error opening directory %s: %s",
-                                       iter->base.path.buf, strerror(errno));
-                               /* Popping the level is handled below */
-                       }
-
-                       level->initialized = 1;
-               } else if (S_ISDIR(iter->base.st.st_mode)) {
-                       if (level->dir_state == DIR_STATE_ITER) {
-                               /*
-                                * The directory was just iterated
-                                * over; now prepare to iterate into
-                                * it.
-                                */
-                               level->dir_state = DIR_STATE_RECURSE;
-                               ALLOC_GROW(iter->levels, iter->levels_nr + 1,
-                                          iter->levels_alloc);
-                               level = &iter->levels[iter->levels_nr++];
-                               level->initialized = 0;
-                               continue;
-                       } else {
-                               /*
-                                * The directory has already been
-                                * iterated over and iterated into;
-                                * we're done with it.
-                                */
+               strbuf_setlen(&iter->base.path, level->prefix_len);
+               errno = 0;
+               de = readdir(level->dir);
+
+               if (!de) {
+                       if (errno) {
+                               warning_errno("error reading directory '%s'",
+                                             iter->base.path.buf);
+                               if (iter->flags & DIR_ITERATOR_PEDANTIC)
+                                       goto error_out;
+                       } else if (pop_level(iter) == 0) {
+                               return dir_iterator_abort(dir_iterator);
                        }
+                       continue;
                }
 
-               if (!level->dir) {
-                       /*
-                        * This level is exhausted (or wasn't opened
-                        * successfully); pop up a level.
-                        */
-                       if (--iter->levels_nr == 0)
-                               return dir_iterator_abort(dir_iterator);
+               if (is_dot_or_dotdot(de->d_name))
+                       continue;
 
+               if (prepare_next_entry_data(iter, de)) {
+                       if (errno != ENOENT && iter->flags & DIR_ITERATOR_PEDANTIC)
+                               goto error_out;
                        continue;
                }
 
-               /*
-                * Loop until we find an entry that we can give back
-                * to the caller:
-                */
-               while (1) {
-                       strbuf_setlen(&iter->base.path, level->prefix_len);
-                       errno = 0;
-                       de = readdir(level->dir);
-
-                       if (!de) {
-                               /* This level is exhausted; pop up a level. */
-                               if (errno) {
-                                       warning("error reading directory %s: %s",
-                                               iter->base.path.buf, strerror(errno));
-                               } else if (closedir(level->dir))
-                                       warning("error closing directory %s: %s",
-                                               iter->base.path.buf, strerror(errno));
-
-                               level->dir = NULL;
-                               if (--iter->levels_nr == 0)
-                                       return dir_iterator_abort(dir_iterator);
-                               break;
-                       }
-
-                       if (is_dot_or_dotdot(de->d_name))
-                               continue;
-
-                       strbuf_addstr(&iter->base.path, de->d_name);
-                       if (lstat(iter->base.path.buf, &iter->base.st) < 0) {
-                               if (errno != ENOENT)
-                                       warning("error reading path '%s': %s",
-                                               iter->base.path.buf,
-                                               strerror(errno));
-                               continue;
-                       }
-
-                       /*
-                        * We have to set these each time because
-                        * the path strbuf might have been realloc()ed.
-                        */
-                       iter->base.relative_path =
-                               iter->base.path.buf + iter->levels[0].prefix_len;
-                       iter->base.basename =
-                               iter->base.path.buf + level->prefix_len;
-                       level->dir_state = DIR_STATE_ITER;
-
-                       return ITER_OK;
-               }
+               return ITER_OK;
        }
+
+error_out:
+       dir_iterator_abort(dir_iterator);
+       return ITER_ERROR;
 }
 
 int dir_iterator_abort(struct dir_iterator *dir_iterator)
@@ -170,9 +185,11 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator)
                        &iter->levels[iter->levels_nr - 1];
 
                if (level->dir && closedir(level->dir)) {
+                       int saved_errno = errno;
                        strbuf_setlen(&iter->base.path, level->prefix_len);
-                       warning("error closing directory %s: %s",
-                               iter->base.path.buf, strerror(errno));
+                       errno = saved_errno;
+                       warning_errno("error closing directory '%s'",
+                                     iter->base.path.buf);
                }
        }
 
@@ -182,21 +199,37 @@ int dir_iterator_abort(struct dir_iterator *dir_iterator)
        return ITER_DONE;
 }
 
-struct dir_iterator *dir_iterator_begin(const char *path)
+struct dir_iterator *dir_iterator_begin(const char *path, unsigned int flags)
 {
        struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter));
        struct dir_iterator *dir_iterator = &iter->base;
-
-       if (!path || !*path)
-               BUG("empty path passed to dir_iterator_begin()");
+       int saved_errno;
 
        strbuf_init(&iter->base.path, PATH_MAX);
        strbuf_addstr(&iter->base.path, path);
 
        ALLOC_GROW(iter->levels, 10, iter->levels_alloc);
+       iter->levels_nr = 0;
+       iter->flags = flags;
 
-       iter->levels_nr = 1;
-       iter->levels[0].initialized = 0;
+       /*
+        * Note: stat already checks for NULL or empty strings and
+        * inexistent paths.
+        */
+       if (stat(iter->base.path.buf, &iter->base.st) < 0) {
+               saved_errno = errno;
+               goto error_out;
+       }
+
+       if (!S_ISDIR(iter->base.st.st_mode)) {
+               saved_errno = ENOTDIR;
+               goto error_out;
+       }
 
        return dir_iterator;
+
+error_out:
+       dir_iterator_abort(dir_iterator);
+       errno = saved_errno;
+       return NULL;
 }
index 970793d07a1d72761159adca51d16859c7b72a29..08229157c638040cb19f7dd32b680fbea45f3965 100644 (file)
@@ -8,18 +8,23 @@
  *
  * Iterate over a directory tree, recursively, including paths of all
  * types and hidden paths. Skip "." and ".." entries and don't follow
- * symlinks except for the original path.
+ * symlinks except for the original path. Note that the original path
+ * is not included in the iteration.
  *
  * Every time dir_iterator_advance() is called, update the members of
  * the dir_iterator structure to reflect the next path in the
  * iteration. The order that paths are iterated over within a
- * directory is undefined, but directory paths are always iterated
- * over before the subdirectory contents.
+ * directory is undefined, directory paths are always given before
+ * their contents.
  *
  * A typical iteration looks like this:
  *
  *     int ok;
- *     struct iterator *iter = dir_iterator_begin(path);
+ *     unsigned int flags = DIR_ITERATOR_PEDANTIC;
+ *     struct dir_iterator *iter = dir_iterator_begin(path, flags);
+ *
+ *     if (!iter)
+ *             goto error_handler;
  *
  *     while ((ok = dir_iterator_advance(iter)) == ITER_OK) {
  *             if (want_to_stop_iteration()) {
  * dir_iterator_advance() again.
  */
 
+/*
+ * Flags for dir_iterator_begin:
+ *
+ * - DIR_ITERATOR_PEDANTIC: override dir-iterator's default behavior
+ *   in case of an error at dir_iterator_advance(), which is to keep
+ *   looking for a next valid entry. With this flag, resources are freed
+ *   and ITER_ERROR is returned immediately. In both cases, a meaningful
+ *   warning is emitted. Note: ENOENT errors are always ignored so that
+ *   the API users may remove files during iteration.
+ *
+ * - DIR_ITERATOR_FOLLOW_SYMLINKS: make dir-iterator follow symlinks.
+ *   i.e., linked directories' contents will be iterated over and
+ *   iter->base.st will contain information on the referred files,
+ *   not the symlinks themselves, which is the default behavior. Broken
+ *   symlinks are ignored.
+ *
+ * Warning: circular symlinks are also followed when
+ * DIR_ITERATOR_FOLLOW_SYMLINKS is set. The iteration may end up with
+ * an ELOOP if they happen and DIR_ITERATOR_PEDANTIC is set.
+ */
+#define DIR_ITERATOR_PEDANTIC (1 << 0)
+#define DIR_ITERATOR_FOLLOW_SYMLINKS (1 << 1)
+
 struct dir_iterator {
        /* The current path: */
        struct strbuf path;
@@ -54,28 +82,38 @@ struct dir_iterator {
        /* The current basename: */
        const char *basename;
 
-       /* The result of calling lstat() on path: */
+       /*
+        * The result of calling lstat() on path; or stat(), if the
+        * DIR_ITERATOR_FOLLOW_SYMLINKS flag was set at
+        * dir_iterator's initialization.
+        */
        struct stat st;
 };
 
 /*
- * Start a directory iteration over path. Return a dir_iterator that
- * holds the internal state of the iteration.
+ * Start a directory iteration over path with the combination of
+ * options specified by flags. On success, return a dir_iterator
+ * that holds the internal state of the iteration. In case of
+ * failure, return NULL and set errno accordingly.
  *
  * The iteration includes all paths under path, not including path
  * itself and not including "." or ".." entries.
  *
- * path is the starting directory. An internal copy will be made.
+ * Parameters are:
+ *  - path is the starting directory. An internal copy will be made.
+ *  - flags is a combination of the possible flags to initialize a
+ *    dir-iterator or 0 for default behavior.
  */
-struct dir_iterator *dir_iterator_begin(const char *path);
+struct dir_iterator *dir_iterator_begin(const char *path, unsigned int flags);
 
 /*
  * Advance the iterator to the first or next item and return ITER_OK.
  * If the iteration is exhausted, free the dir_iterator and any
- * resources associated with it and return ITER_DONE. On error, free
- * dir_iterator and associated resources and return ITER_ERROR. It is
- * a bug to use iterator or call this function again after it has
- * returned ITER_DONE or ITER_ERROR.
+ * resources associated with it and return ITER_DONE.
+ *
+ * It is a bug to use iterator or call this function again after it
+ * has returned ITER_DONE or ITER_ERROR (which may be returned iff
+ * the DIR_ITERATOR_PEDANTIC flag was set).
  */
 int dir_iterator_advance(struct dir_iterator *iterator);
 
index 63e55e67730fcf01d8ca545e15b3c1e5b50c4b06..b1f8f53a098da3525fdff5011b93f39138df3df2 100644 (file)
@@ -2143,13 +2143,22 @@ static struct ref_iterator_vtable files_reflog_iterator_vtable = {
 static struct ref_iterator *reflog_iterator_begin(struct ref_store *ref_store,
                                                  const char *gitdir)
 {
-       struct files_reflog_iterator *iter = xcalloc(1, sizeof(*iter));
-       struct ref_iterator *ref_iterator = &iter->base;
+       struct dir_iterator *diter;
+       struct files_reflog_iterator *iter;
+       struct ref_iterator *ref_iterator;
        struct strbuf sb = STRBUF_INIT;
 
-       base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable, 0);
        strbuf_addf(&sb, "%s/logs", gitdir);
-       iter->dir_iterator = dir_iterator_begin(sb.buf);
+
+       diter = dir_iterator_begin(sb.buf, 0);
+       if(!diter)
+               return empty_ref_iterator_begin();
+
+       iter = xcalloc(1, sizeof(*iter));
+       ref_iterator = &iter->base;
+
+       base_ref_iterator_init(ref_iterator, &files_reflog_iterator_vtable, 0);
+       iter->dir_iterator = diter;
        iter->ref_store = ref_store;
        strbuf_release(&sb);
 
diff --git a/t/helper/test-dir-iterator.c b/t/helper/test-dir-iterator.c
new file mode 100644 (file)
index 0000000..a5b96cb
--- /dev/null
@@ -0,0 +1,58 @@
+#include "test-tool.h"
+#include "git-compat-util.h"
+#include "strbuf.h"
+#include "iterator.h"
+#include "dir-iterator.h"
+
+/*
+ * usage:
+ * tool-test dir-iterator [--follow-symlinks] [--pedantic] directory_path
+ */
+int cmd__dir_iterator(int argc, const char **argv)
+{
+       struct strbuf path = STRBUF_INIT;
+       struct dir_iterator *diter;
+       unsigned int flags = 0;
+       int iter_status;
+
+       for (++argv, --argc; *argv && starts_with(*argv, "--"); ++argv, --argc) {
+               if (strcmp(*argv, "--follow-symlinks") == 0)
+                       flags |= DIR_ITERATOR_FOLLOW_SYMLINKS;
+               else if (strcmp(*argv, "--pedantic") == 0)
+                       flags |= DIR_ITERATOR_PEDANTIC;
+               else
+                       die("invalid option '%s'", *argv);
+       }
+
+       if (!*argv || argc != 1)
+               die("dir-iterator needs exactly one non-option argument");
+
+       strbuf_add(&path, *argv, strlen(*argv));
+       diter = dir_iterator_begin(path.buf, flags);
+
+       if (!diter) {
+               printf("dir_iterator_begin failure: %d\n", errno);
+               exit(EXIT_FAILURE);
+       }
+
+       while ((iter_status = dir_iterator_advance(diter)) == ITER_OK) {
+               if (S_ISDIR(diter->st.st_mode))
+                       printf("[d] ");
+               else if (S_ISREG(diter->st.st_mode))
+                       printf("[f] ");
+               else if (S_ISLNK(diter->st.st_mode))
+                       printf("[s] ");
+               else
+                       printf("[?] ");
+
+               printf("(%s) [%s] %s\n", diter->relative_path, diter->basename,
+                      diter->path.buf);
+       }
+
+       if (iter_status != ITER_DONE) {
+               printf("dir_iterator_advance failure\n");
+               return 1;
+       }
+
+       return 0;
+}
index 1eac25233f7ce62ecb00b2d1e3d06d1423c3581f..ce7e89028c4a7cc328998b0acfd42ec983cd7a74 100644 (file)
@@ -19,6 +19,7 @@ static struct test_cmd cmds[] = {
        { "ctype", cmd__ctype },
        { "date", cmd__date },
        { "delta", cmd__delta },
+       { "dir-iterator", cmd__dir_iterator },
        { "drop-caches", cmd__drop_caches },
        { "dump-cache-tree", cmd__dump_cache_tree },
        { "dump-fsmonitor", cmd__dump_fsmonitor },
index c7a46dc320e93b3bb5aef5f7fce3697c4558f814..f805bb39ae9e2ebe83ecb4e52f953fbcb43ce444 100644 (file)
@@ -9,6 +9,7 @@ int cmd__config(int argc, const char **argv);
 int cmd__ctype(int argc, const char **argv);
 int cmd__date(int argc, const char **argv);
 int cmd__delta(int argc, const char **argv);
+int cmd__dir_iterator(int argc, const char **argv);
 int cmd__drop_caches(int argc, const char **argv);
 int cmd__dump_cache_tree(int argc, const char **argv);
 int cmd__dump_fsmonitor(int argc, const char **argv);
diff --git a/t/t0066-dir-iterator.sh b/t/t0066-dir-iterator.sh
new file mode 100755 (executable)
index 0000000..9354d3f
--- /dev/null
@@ -0,0 +1,148 @@
+#!/bin/sh
+
+test_description='Test the dir-iterator functionality'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+       mkdir -p dir &&
+       mkdir -p dir/a/b/c/ &&
+       >dir/b &&
+       >dir/c &&
+       mkdir -p dir/d/e/d/ &&
+       >dir/a/b/c/d &&
+       >dir/a/e &&
+       >dir/d/e/d/a &&
+
+       mkdir -p dir2/a/b/c/ &&
+       >dir2/a/b/c/d
+'
+
+test_expect_success 'dir-iterator should iterate through all files' '
+       cat >expected-iteration-sorted-output <<-EOF &&
+       [d] (a) [a] ./dir/a
+       [d] (a/b) [b] ./dir/a/b
+       [d] (a/b/c) [c] ./dir/a/b/c
+       [d] (d) [d] ./dir/d
+       [d] (d/e) [e] ./dir/d/e
+       [d] (d/e/d) [d] ./dir/d/e/d
+       [f] (a/b/c/d) [d] ./dir/a/b/c/d
+       [f] (a/e) [e] ./dir/a/e
+       [f] (b) [b] ./dir/b
+       [f] (c) [c] ./dir/c
+       [f] (d/e/d/a) [a] ./dir/d/e/d/a
+       EOF
+
+       test-tool dir-iterator ./dir >out &&
+       sort out >./actual-iteration-sorted-output &&
+
+       test_cmp expected-iteration-sorted-output actual-iteration-sorted-output
+'
+
+test_expect_success 'dir-iterator should list files in the correct order' '
+       cat >expected-pre-order-output <<-EOF &&
+       [d] (a) [a] ./dir2/a
+       [d] (a/b) [b] ./dir2/a/b
+       [d] (a/b/c) [c] ./dir2/a/b/c
+       [f] (a/b/c/d) [d] ./dir2/a/b/c/d
+       EOF
+
+       test-tool dir-iterator ./dir2 >actual-pre-order-output &&
+
+       test_cmp expected-pre-order-output actual-pre-order-output
+'
+
+test_expect_success 'begin should fail upon inexistent paths' '
+       test_must_fail test-tool dir-iterator ./inexistent-path \
+               >actual-inexistent-path-output &&
+       echo "dir_iterator_begin failure: 2" >expected-inexistent-path-output &&
+       test_cmp expected-inexistent-path-output actual-inexistent-path-output
+'
+
+test_expect_success 'begin should fail upon non directory paths' '
+       test_must_fail test-tool dir-iterator ./dir/b >actual-non-dir-output &&
+       echo "dir_iterator_begin failure: 20" >expected-non-dir-output &&
+       test_cmp expected-non-dir-output actual-non-dir-output
+'
+
+test_expect_success POSIXPERM,SANITY 'advance should not fail on errors by default' '
+       cat >expected-no-permissions-output <<-EOF &&
+       [d] (a) [a] ./dir3/a
+       EOF
+
+       mkdir -p dir3/a &&
+       >dir3/a/b &&
+       chmod 0 dir3/a &&
+
+       test-tool dir-iterator ./dir3 >actual-no-permissions-output &&
+       test_cmp expected-no-permissions-output actual-no-permissions-output &&
+       chmod 755 dir3/a &&
+       rm -rf dir3
+'
+
+test_expect_success POSIXPERM,SANITY 'advance should fail on errors, w/ pedantic flag' '
+       cat >expected-no-permissions-pedantic-output <<-EOF &&
+       [d] (a) [a] ./dir3/a
+       dir_iterator_advance failure
+       EOF
+
+       mkdir -p dir3/a &&
+       >dir3/a/b &&
+       chmod 0 dir3/a &&
+
+       test_must_fail test-tool dir-iterator --pedantic ./dir3 \
+               >actual-no-permissions-pedantic-output &&
+       test_cmp expected-no-permissions-pedantic-output \
+               actual-no-permissions-pedantic-output &&
+       chmod 755 dir3/a &&
+       rm -rf dir3
+'
+
+test_expect_success SYMLINKS 'setup dirs with symlinks' '
+       mkdir -p dir4/a &&
+       mkdir -p dir4/b/c &&
+       >dir4/a/d &&
+       ln -s d dir4/a/e &&
+       ln -s ../b dir4/a/f &&
+
+       mkdir -p dir5/a/b &&
+       mkdir -p dir5/a/c &&
+       ln -s ../c dir5/a/b/d &&
+       ln -s ../ dir5/a/b/e &&
+       ln -s ../../ dir5/a/b/f
+'
+
+test_expect_success SYMLINKS 'dir-iterator should not follow symlinks by default' '
+       cat >expected-no-follow-sorted-output <<-EOF &&
+       [d] (a) [a] ./dir4/a
+       [d] (b) [b] ./dir4/b
+       [d] (b/c) [c] ./dir4/b/c
+       [f] (a/d) [d] ./dir4/a/d
+       [s] (a/e) [e] ./dir4/a/e
+       [s] (a/f) [f] ./dir4/a/f
+       EOF
+
+       test-tool dir-iterator ./dir4 >out &&
+       sort out >actual-no-follow-sorted-output &&
+
+       test_cmp expected-no-follow-sorted-output actual-no-follow-sorted-output
+'
+
+test_expect_success SYMLINKS 'dir-iterator should follow symlinks w/ follow flag' '
+       cat >expected-follow-sorted-output <<-EOF &&
+       [d] (a) [a] ./dir4/a
+       [d] (a/f) [f] ./dir4/a/f
+       [d] (a/f/c) [c] ./dir4/a/f/c
+       [d] (b) [b] ./dir4/b
+       [d] (b/c) [c] ./dir4/b/c
+       [f] (a/d) [d] ./dir4/a/d
+       [f] (a/e) [e] ./dir4/a/e
+       EOF
+
+       test-tool dir-iterator --follow-symlinks ./dir4 >out &&
+       sort out >actual-follow-sorted-output &&
+
+       test_cmp expected-follow-sorted-output actual-follow-sorted-output
+'
+
+test_done
index 4320082b1b8fe709e9462f84d15fec63c54a94d4..4894237ab8059c83aea8aae50a0803ebf9df6831 100755 (executable)
@@ -221,4 +221,137 @@ test_expect_success 'clone, dissociate from alternates' '
        ( cd C && git fsck )
 '
 
+test_expect_success 'setup repo with garbage in objects/*' '
+       git init S &&
+       (
+               cd S &&
+               test_commit A &&
+
+               cd .git/objects &&
+               >.some-hidden-file &&
+               >some-file &&
+               mkdir .some-hidden-dir &&
+               >.some-hidden-dir/some-file &&
+               >.some-hidden-dir/.some-dot-file &&
+               mkdir some-dir &&
+               >some-dir/some-file &&
+               >some-dir/.some-dot-file
+       )
+'
+
+test_expect_success 'clone a repo with garbage in objects/*' '
+       for option in --local --no-hardlinks --shared --dissociate
+       do
+               git clone $option S S$option || return 1 &&
+               git -C S$option fsck || return 1
+       done &&
+       find S-* -name "*some*" | sort >actual &&
+       cat >expected <<-EOF &&
+       S--dissociate/.git/objects/.some-hidden-dir
+       S--dissociate/.git/objects/.some-hidden-dir/.some-dot-file
+       S--dissociate/.git/objects/.some-hidden-dir/some-file
+       S--dissociate/.git/objects/.some-hidden-file
+       S--dissociate/.git/objects/some-dir
+       S--dissociate/.git/objects/some-dir/.some-dot-file
+       S--dissociate/.git/objects/some-dir/some-file
+       S--dissociate/.git/objects/some-file
+       S--local/.git/objects/.some-hidden-dir
+       S--local/.git/objects/.some-hidden-dir/.some-dot-file
+       S--local/.git/objects/.some-hidden-dir/some-file
+       S--local/.git/objects/.some-hidden-file
+       S--local/.git/objects/some-dir
+       S--local/.git/objects/some-dir/.some-dot-file
+       S--local/.git/objects/some-dir/some-file
+       S--local/.git/objects/some-file
+       S--no-hardlinks/.git/objects/.some-hidden-dir
+       S--no-hardlinks/.git/objects/.some-hidden-dir/.some-dot-file
+       S--no-hardlinks/.git/objects/.some-hidden-dir/some-file
+       S--no-hardlinks/.git/objects/.some-hidden-file
+       S--no-hardlinks/.git/objects/some-dir
+       S--no-hardlinks/.git/objects/some-dir/.some-dot-file
+       S--no-hardlinks/.git/objects/some-dir/some-file
+       S--no-hardlinks/.git/objects/some-file
+       EOF
+       test_cmp expected actual
+'
+
+test_expect_success SYMLINKS 'setup repo with manually symlinked or unknown files at objects/' '
+       git init T &&
+       (
+               cd T &&
+               git config gc.auto 0 &&
+               test_commit A &&
+               git gc &&
+               test_commit B &&
+
+               cd .git/objects &&
+               mv pack packs &&
+               ln -s packs pack &&
+               find ?? -type d >loose-dirs &&
+               last_loose=$(tail -n 1 loose-dirs) &&
+               mv $last_loose a-loose-dir &&
+               ln -s a-loose-dir $last_loose &&
+               first_loose=$(head -n 1 loose-dirs) &&
+               rm -f loose-dirs &&
+
+               cd $first_loose &&
+               obj=$(ls *) &&
+               mv $obj ../an-object &&
+               ln -s ../an-object $obj &&
+
+               cd ../ &&
+               find . -type f | sort >../../../T.objects-files.raw &&
+               find . -type l | sort >../../../T.objects-symlinks.raw &&
+               echo unknown_content >unknown_file
+       ) &&
+       git -C T fsck &&
+       git -C T rev-list --all --objects >T.objects
+'
+
+
+test_expect_success SYMLINKS 'clone repo with symlinked or unknown files at objects/' '
+       for option in --local --no-hardlinks --shared --dissociate
+       do
+               git clone $option T T$option || return 1 &&
+               git -C T$option fsck || return 1 &&
+               git -C T$option rev-list --all --objects >T$option.objects &&
+               test_cmp T.objects T$option.objects &&
+               (
+                       cd T$option/.git/objects &&
+                       find . -type f | sort >../../../T$option.objects-files.raw &&
+                       find . -type l | sort >../../../T$option.objects-symlinks.raw
+               )
+       done &&
+
+       for raw in $(ls T*.raw)
+       do
+               sed -e "s!/../!/Y/!; s![0-9a-f]\{38,\}!Z!" -e "/commit-graph/d" \
+                   -e "/multi-pack-index/d" <$raw >$raw.de-sha || return 1
+       done &&
+
+       cat >expected-files <<-EOF &&
+       ./Y/Z
+       ./Y/Z
+       ./a-loose-dir/Z
+       ./an-object
+       ./Y/Z
+       ./info/packs
+       ./pack/pack-Z.idx
+       ./pack/pack-Z.pack
+       ./packs/pack-Z.idx
+       ./packs/pack-Z.pack
+       ./unknown_file
+       EOF
+
+       for option in --local --no-hardlinks --dissociate
+       do
+               test_cmp expected-files T$option.objects-files.raw.de-sha || return 1 &&
+               test_must_be_empty T$option.objects-symlinks.raw.de-sha || return 1
+       done &&
+
+       echo ./info/alternates >expected-files &&
+       test_cmp expected-files T--shared.objects-files.raw &&
+       test_must_be_empty T--shared.objects-symlinks.raw
+'
+
 test_done