Racy git: avoid having to be always too careful
authorJunio C Hamano <junkio@cox.net>
Sat, 5 Aug 2006 11:16:02 +0000 (04:16 -0700)
committerJunio C Hamano <junkio@cox.net>
Mon, 7 Aug 2006 08:58:53 +0000 (01:58 -0700)
Immediately after a bulk checkout, most of the paths in the
working tree would have the same timestamp as the index file,
and this would force ce_match_stat() to take slow path for all
of them. When writing an index file out, if many of the paths
have very new (read: the same timestamp as the index file being
written out) timestamp, we are better off delaying the return
from the command, to make sure that later command to touch the
working tree files will leave newer timestamps than recorded in
the index, thereby avoiding to take the slow path.

Signed-off-by: Junio C Hamano <junkio@cox.net>
read-cache.c
index f92cdaacee1b43bf09929e7072c281dbcbb09c10..ce76c20f34fb0f976fe6264918e2945bbe4a19d6 100644 (file)
@@ -5,6 +5,7 @@
  */
 #include "cache.h"
 #include "cache-tree.h"
+#include <time.h>
 
 /* Index extensions.
  *
@@ -923,7 +924,7 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce)
                 * $ echo filfre >nitfol
                 * $ git-update-index --add nitfol
                 *
-                * but it does not.  Whe the second update-index runs,
+                * but it does not.  When the second update-index runs,
                 * it notices that the entry "frotz" has the same timestamp
                 * as index, and if we were to smudge it by resetting its
                 * size to zero here, then the object name recorded
@@ -945,7 +946,9 @@ int write_cache(int newfd, struct cache_entry **cache, int entries)
 {
        SHA_CTX c;
        struct cache_header hdr;
-       int i, removed;
+       int i, removed, recent;
+       struct stat st;
+       time_t now;
 
        for (i = removed = 0; i < entries; i++)
                if (!cache[i]->ce_mode)
@@ -959,15 +962,19 @@ int write_cache(int newfd, struct cache_entry **cache, int entries)
        if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
                return -1;
 
+       now = fstat(newfd, &st) ? 0 : st.st_mtime;
+       recent = 0;
        for (i = 0; i < entries; i++) {
                struct cache_entry *ce = cache[i];
+               time_t entry_time = (time_t) ntohl(ce->ce_mtime.sec);
                if (!ce->ce_mode)
                        continue;
-               if (index_file_timestamp &&
-                   index_file_timestamp <= ntohl(ce->ce_mtime.sec))
+               if (index_file_timestamp && index_file_timestamp <= entry_time)
                        ce_smudge_racily_clean_entry(ce);
                if (ce_write(&c, newfd, ce, ce_size(ce)) < 0)
                        return -1;
+               if (now && now <= entry_time)
+                       recent++;
        }
 
        /* Write extension data here */
@@ -983,5 +990,34 @@ int write_cache(int newfd, struct cache_entry **cache, int entries)
                        return -1;
                }
        }
+
+       /*
+        * To prevent later ce_match_stat() from always falling into
+        * check_fs(), if we have too many entries that can trigger
+        * racily clean check, we are better off delaying the return.
+        * We arbitrarily say if more than 20 paths or 25% of total
+        * paths are very new, we delay the return until the index
+        * file gets a new timestamp.
+        *
+        * NOTE! NOTE! NOTE!
+        *
+        * This assumes that nobody is touching the working tree while
+        * we are updating the index.
+        */
+       if (20 < recent || entries <= recent * 4) {
+               now = fstat(newfd, &st) ? 0 : st.st_mtime;
+               while (now && !fstat(newfd, &st) && st.st_mtime <= now) {
+                       struct timespec rq, rm;
+                       off_t where = lseek(newfd, 0, SEEK_CUR);
+                       rq.tv_sec = 0;
+                       rq.tv_nsec = 250000000;
+                       nanosleep(&rq, &rm);
+                       if ((where == (off_t) -1) ||
+                           (write(newfd, "", 1) != 1) ||
+                           (lseek(newfd, -1, SEEK_CUR) != where) ||
+                           ftruncate(newfd, where))
+                               break;
+               }
+       }
        return ce_flush(&c, newfd);
 }