ref_transaction_commit(): fix atomicity and avoid fd exhaustion
authorMichael Haggerty <mhagger@alum.mit.edu>
Fri, 24 Apr 2015 11:35:49 +0000 (13:35 +0200)
committerJunio C Hamano <gitster@pobox.com>
Wed, 13 May 2015 04:28:03 +0000 (21:28 -0700)
The old code was roughly

for update in updates:
acquire locks and check old_sha
for update in updates:
if changing value:
write_ref_to_lockfile()
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()

This has two problems.

Non-atomic updates
==================

The atomicity of the reference transaction depends on all pre-checks
being done in the first loop, before any changes have started being
committed in the second loop. The problem is that
write_ref_to_lockfile() (previously part of write_ref_sha1()), which
is called from the second loop, contains two more checks:

* It verifies that new_sha1 is a valid object

* If the reference being updated is a branch, it verifies that
new_sha1 points at a commit object (as opposed to a tag, tree, or
blob).

If either of these checks fails, the "transaction" is aborted during
the second loop. But this might happen after some reference updates
have already been permanently committed. In other words, the
all-or-nothing promise of "git update-ref --stdin" could be violated.

So these checks have to be moved to the first loop.

File descriptor exhaustion
==========================

The old code locked all of the references in the first loop, leaving
all of the lockfiles open until later loops. Since we might be
updating a lot of references, this could result in file descriptor
exhaustion.

The solution
============

After this patch, the code looks like

for update in updates:
acquire locks and check old_sha
if changing value:
write_ref_to_lockfile()
else:
close_ref()
for update in updates:
if changing value:
commit_ref_update()
for update in updates:
if deleting value:
unlink()
rewrite packed-refs file
for update in updates:
if reference still locked:
unlock_ref()

This fixes both problems:

1. The pre-checks in write_ref_to_lockfile() are now done in the first
loop, before any changes have been committed. If any of the checks
fails, the whole transaction can now be rolled back correctly.

2. All lockfiles are closed in the first loop immediately after they
are created (either by write_ref_to_lockfile() or by close_ref()).
This means that there is never more than one open lockfile at a
time, preventing file descriptor exhaustion.

To simplify the bookkeeping across loops, add a new REF_NEEDS_COMMIT
bit to update->flags, which keeps track of whether the corresponding
lockfile needs to be committed, as opposed to just unlocked. (Since
"struct ref_update" is internal to the refs module, this change is not
visible to external callers.)

This change fixes two tests in t1400.

Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
refs.c
t/t1400-update-ref.sh
diff --git a/refs.c b/refs.c
index 8dad0dfa01e6a785bfe664c396629a4a19005776..96f50467125dad661a79c60e13ecf5b389c4ef10 100644 (file)
--- a/refs.c
+++ b/refs.c
@@ -57,6 +57,12 @@ static unsigned char refname_disposition[256] = {
  */
 #define REF_HAVE_OLD   0x10
 
+/*
+ * Used as a flag in ref_update::flags when the lockfile needs to be
+ * committed.
+ */
+#define REF_NEEDS_COMMIT 0x20
+
 /*
  * Try to read one refname component from the front of refname.
  * Return the length of the component found, or -1 if the component is
@@ -3760,7 +3766,12 @@ int ref_transaction_commit(struct ref_transaction *transaction,
                goto cleanup;
        }
 
-       /* Acquire all locks while verifying old values */
+       /*
+        * Acquire all locks, verify old values if provided, check
+        * that new values are valid, and write new values to the
+        * lockfiles, ready to be activated. Only keep one lockfile
+        * open at a time to avoid running out of file descriptors.
+        */
        for (i = 0; i < n; i++) {
                struct ref_update *update = updates[i];
 
@@ -3782,38 +3793,60 @@ int ref_transaction_commit(struct ref_transaction *transaction,
                                    update->refname);
                        goto cleanup;
                }
-       }
-
-       /* Perform updates first so live commits remain referenced */
-       for (i = 0; i < n; i++) {
-               struct ref_update *update = updates[i];
-
-               if ((update->flags & REF_HAVE_NEW)
-                   && !is_null_sha1(update->new_sha1)) {
+               if ((update->flags & REF_HAVE_NEW) &&
+                   !(update->flags & REF_DELETING)) {
                        int overwriting_symref = ((update->type & REF_ISSYMREF) &&
                                                  (update->flags & REF_NODEREF));
 
-                       if (!overwriting_symref
-                           && !hashcmp(update->lock->old_sha1, update->new_sha1)) {
+                       if (!overwriting_symref &&
+                           !hashcmp(update->lock->old_sha1, update->new_sha1)) {
                                /*
                                 * The reference already has the desired
                                 * value, so we don't need to write it.
                                 */
-                               unlock_ref(update->lock);
-                               update->lock = NULL;
                        } else if (write_ref_to_lockfile(update->lock,
-                                                        update->new_sha1) ||
-                                  commit_ref_update(update->lock,
-                                                    update->new_sha1,
-                                                    update->msg)) {
-                               /* freed by one of the above calls: */
+                                                        update->new_sha1)) {
+                               /*
+                                * The lock was freed upon failure of
+                                * write_ref_to_lockfile():
+                                */
+                               update->lock = NULL;
+                               strbuf_addf(err, "Cannot update the ref '%s'.",
+                                           update->refname);
+                               ret = TRANSACTION_GENERIC_ERROR;
+                               goto cleanup;
+                       } else {
+                               update->flags |= REF_NEEDS_COMMIT;
+                       }
+               }
+               if (!(update->flags & REF_NEEDS_COMMIT)) {
+                       /*
+                        * We didn't have to write anything to the lockfile.
+                        * Close it to free up the file descriptor:
+                        */
+                       if (close_ref(update->lock)) {
+                               strbuf_addf(err, "Couldn't close %s.lock",
+                                           update->refname);
+                               goto cleanup;
+                       }
+               }
+       }
+
+       /* Perform updates first so live commits remain referenced */
+       for (i = 0; i < n; i++) {
+               struct ref_update *update = updates[i];
+
+               if (update->flags & REF_NEEDS_COMMIT) {
+                       if (commit_ref_update(update->lock,
+                                             update->new_sha1, update->msg)) {
+                               /* freed by commit_ref_update(): */
                                update->lock = NULL;
                                strbuf_addf(err, "Cannot update the ref '%s'.",
                                            update->refname);
                                ret = TRANSACTION_GENERIC_ERROR;
                                goto cleanup;
                        } else {
-                               /* freed by write_ref_sha1(): */
+                               /* freed by commit_ref_update(): */
                                update->lock = NULL;
                        }
                }
@@ -3823,8 +3856,7 @@ int ref_transaction_commit(struct ref_transaction *transaction,
        for (i = 0; i < n; i++) {
                struct ref_update *update = updates[i];
 
-               if ((update->flags & REF_HAVE_NEW)
-                   && is_null_sha1(update->new_sha1)) {
+               if (update->flags & REF_DELETING) {
                        if (delete_ref_loose(update->lock, update->type, err)) {
                                ret = TRANSACTION_GENERIC_ERROR;
                                goto cleanup;
index 7a69f1a0833677a77c0d37099843c5c17ee97fcc..636d3a167c4aa17bbb8f8b58d6a66142d90d1af2 100755 (executable)
@@ -1071,7 +1071,7 @@ run_with_limited_open_files () {
 
 test_lazy_prereq ULIMIT_FILE_DESCRIPTORS 'run_with_limited_open_files true'
 
-test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
+test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' '
 (
        for i in $(test_seq 33)
        do
@@ -1082,7 +1082,7 @@ test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches
 )
 '
 
-test_expect_failure ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
+test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction deleting branches does not burst open file limit' '
 (
        for i in $(test_seq 33)
        do