commit-graph: expire commit-graph files
authorDerrick Stolee <dstolee@microsoft.com>
Tue, 18 Jun 2019 18:14:31 +0000 (11:14 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 20 Jun 2019 03:46:26 +0000 (20:46 -0700)
As we merge commit-graph files in a commit-graph chain, we should clean
up the files that are no longer used.

This change introduces an 'expiry_window' value to the context, which is
always zero (for now). We then check the modified time of each
graph-{hash}.graph file in the $OBJDIR/info/commit-graphs folder and
unlink the files that are older than the expiry_window.

Since this is always zero, this immediately clears all unused graph
files. We will update the value to match a config setting in a future
change.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/technical/commit-graph.txt
commit-graph.c
t/t5324-split-commit-graph.sh
index 473032e47696ec32bdff125afd16dc06fb563558..aed4350a5962286678ba6057b147706c2db5b982 100644 (file)
@@ -266,6 +266,21 @@ The merge strategy values (2 for the size multiple, 64,000 for the maximum
 number of commits) could be extracted into config settings for full
 flexibility.
 
 number of commits) could be extracted into config settings for full
 flexibility.
 
+## Deleting graph-{hash} files
+
+After a new tip file is written, some `graph-{hash}` files may no longer
+be part of a chain. It is important to remove these files from disk, eventually.
+The main reason to delay removal is that another process could read the
+`commit-graph-chain` file before it is rewritten, but then look for the
+`graph-{hash}` files after they are deleted.
+
+To allow holding old split commit-graphs for a while after they are unreferenced,
+we update the modified times of the files when they become unreferenced. Then,
+we scan the `$OBJDIR/info/commit-graphs/` directory for `graph-{hash}`
+files whose modified times are older than a given expiry window. This window
+defaults to zero, but can be changed using command-line arguments or a config
+setting.
+
 ## Chains across multiple object directories
 
 In a repo with alternates, we look for the `commit-graph-chain` file starting
 ## Chains across multiple object directories
 
 In a repo with alternates, we look for the `commit-graph-chain` file starting
index fba705bc51e507368b75d671edd15c7eaf0fc5a7..0cc2ceb349a2dec9164e50143f73425618b94acc 100644 (file)
@@ -1652,6 +1652,70 @@ static void merge_commit_graphs(struct write_commit_graph_context *ctx)
        sort_and_scan_merged_commits(ctx);
 }
 
        sort_and_scan_merged_commits(ctx);
 }
 
+static void mark_commit_graphs(struct write_commit_graph_context *ctx)
+{
+       uint32_t i;
+       time_t now = time(NULL);
+
+       for (i = ctx->num_commit_graphs_after - 1; i < ctx->num_commit_graphs_before; i++) {
+               struct stat st;
+               struct utimbuf updated_time;
+
+               stat(ctx->commit_graph_filenames_before[i], &st);
+
+               updated_time.actime = st.st_atime;
+               updated_time.modtime = now;
+               utime(ctx->commit_graph_filenames_before[i], &updated_time);
+       }
+}
+
+static void expire_commit_graphs(struct write_commit_graph_context *ctx)
+{
+       struct strbuf path = STRBUF_INIT;
+       DIR *dir;
+       struct dirent *de;
+       size_t dirnamelen;
+       time_t expire_time = time(NULL);
+
+       strbuf_addstr(&path, ctx->obj_dir);
+       strbuf_addstr(&path, "/info/commit-graphs");
+       dir = opendir(path.buf);
+
+       if (!dir) {
+               strbuf_release(&path);
+               return;
+       }
+
+       strbuf_addch(&path, '/');
+       dirnamelen = path.len;
+       while ((de = readdir(dir)) != NULL) {
+               struct stat st;
+               uint32_t i, found = 0;
+
+               strbuf_setlen(&path, dirnamelen);
+               strbuf_addstr(&path, de->d_name);
+
+               stat(path.buf, &st);
+
+               if (st.st_mtime > expire_time)
+                       continue;
+               if (path.len < 6 || strcmp(path.buf + path.len - 6, ".graph"))
+                       continue;
+
+               for (i = 0; i < ctx->num_commit_graphs_after; i++) {
+                       if (!strcmp(ctx->commit_graph_filenames_after[i],
+                                   path.buf)) {
+                               found = 1;
+                               break;
+                       }
+               }
+
+               if (!found)
+                       unlink(path.buf);
+
+       }
+}
+
 int write_commit_graph(const char *obj_dir,
                       struct string_list *pack_indexes,
                       struct string_list *commit_hex,
 int write_commit_graph(const char *obj_dir,
                       struct string_list *pack_indexes,
                       struct string_list *commit_hex,
@@ -1764,6 +1828,11 @@ int write_commit_graph(const char *obj_dir,
 
        res = write_commit_graph_file(ctx);
 
 
        res = write_commit_graph_file(ctx);
 
+       if (ctx->split) {
+               mark_commit_graphs(ctx);
+               expire_commit_graphs(ctx);
+       }
+
 cleanup:
        free(ctx->graph_name);
        free(ctx->commits.list);
 cleanup:
        free(ctx->graph_name);
        free(ctx->commits.list);
index 46f0832f6801009624b644b3de1bb89696e7710c..76068ee4077e6c2a4cfaf6ed24832b5f0f1f335f 100755 (executable)
@@ -141,7 +141,7 @@ test_expect_success 'add one commit, write a merged graph' '
        test_path_is_file $graphdir/commit-graph-chain &&
        test_line_count = 2 $graphdir/commit-graph-chain &&
        ls $graphdir/graph-*.graph >graph-files &&
        test_path_is_file $graphdir/commit-graph-chain &&
        test_line_count = 2 $graphdir/commit-graph-chain &&
        ls $graphdir/graph-*.graph >graph-files &&
-       test_line_count = 4 graph-files &&
+       test_line_count = 2 graph-files &&
        verify_chain_files_exist $graphdir
 '
 
        verify_chain_files_exist $graphdir
 '