bundle: dup() output descriptor closer to point-of-use
authorJeff King <peff@peff.net>
Fri, 16 Nov 2018 09:43:59 +0000 (04:43 -0500)
committerJunio C Hamano <gitster@pobox.com>
Sat, 17 Nov 2018 07:05:49 +0000 (16:05 +0900)
When writing a bundle to a file, the bundle code actually creates
"your.bundle.lock" using our lockfile interface. We feed that output
descriptor to a child git-pack-objects via run-command, which has the
quirk that it closes the output descriptor in the parent.

To avoid confusing the lockfile code (which still thinks the descriptor
is valid), we dup() it, and operate on the duplicate.

However, this has a confusing side effect: after the dup() but before we
call pack-objects, we have _two_ descriptors open to the lockfile. If we
call die() during that time, the lockfile code will try to clean up the
partially-written file. It knows to close() the file before unlinking,
since on some platforms (i.e., Windows) the open file would block the
deletion. But it doesn't know about the duplicate descriptor. On
Windows, triggering an error at the right part of the code will result
in the cleanup failing and the lockfile being left in the filesystem.

We can solve this by moving the dup() much closer to start_command(),
shrinking the window in which we have the second descriptor open. It's
easy to place this in such a way that no die() is possible. We could
still die due to a signal in the exact wrong moment, but we already
tolerate races there (e.g., a signal could come before we manage to put
the file on the cleanup list in the first place).

As a bonus, this shields create_bundle() itself from the duplicate-fd
trick, and we can simplify its error handling (note that the lock
rollback now happens unconditionally, but that's OK; it's a noop if we
didn't open the lock in the first place).

The included test uses an empty bundle to cause a failure at the right
spot in the code, because that's easy to trigger (the other likely
errors are write() problems like ENOSPC). Note that it would already
pass on non-Windows systems (because they are happy to unlink an
already-open file).

Based-on-a-patch-by: Gaƫl Lhez <gael.lhez@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Tested-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
bundle.c
t/t5607-clone-bundle.sh
index 24cbe409863a83e8bda453af637c2237504a531d..0ae8c2d796b284125189c3a809faabfa62fac50e 100644 (file)
--- a/bundle.c
+++ b/bundle.c
@@ -243,7 +243,7 @@ static int is_tag_in_date_range(struct object *tag, struct rev_info *revs)
 }
 
 
-/* Write the pack data to bundle_fd, then close it if it is > 1. */
+/* Write the pack data to bundle_fd */
 static int write_pack_data(int bundle_fd, struct rev_info *revs)
 {
        struct child_process pack_objects = CHILD_PROCESS_INIT;
@@ -256,6 +256,20 @@ static int write_pack_data(int bundle_fd, struct rev_info *revs)
        pack_objects.in = -1;
        pack_objects.out = bundle_fd;
        pack_objects.git_cmd = 1;
+
+       /*
+        * start_command() will close our descriptor if it's >1. Duplicate it
+        * to avoid surprising the caller.
+        */
+       if (pack_objects.out > 1) {
+               pack_objects.out = dup(pack_objects.out);
+               if (pack_objects.out < 0) {
+                       error_errno(_("unable to dup bundle descriptor"));
+                       child_process_clear(&pack_objects);
+                       return -1;
+               }
+       }
+
        if (start_command(&pack_objects))
                return error(_("Could not spawn pack-objects"));
 
@@ -421,21 +435,10 @@ int create_bundle(struct bundle_header *header, const char *path,
        bundle_to_stdout = !strcmp(path, "-");
        if (bundle_to_stdout)
                bundle_fd = 1;
-       else {
+       else
                bundle_fd = hold_lock_file_for_update(&lock, path,
                                                      LOCK_DIE_ON_ERROR);
 
-               /*
-                * write_pack_data() will close the fd passed to it,
-                * but commit_lock_file() will also try to close the
-                * lockfile's fd. So make a copy of the file
-                * descriptor to avoid trying to close it twice.
-                */
-               bundle_fd = dup(bundle_fd);
-               if (bundle_fd < 0)
-                       die_errno("unable to dup file descriptor");
-       }
-
        /* write signature */
        write_or_die(bundle_fd, bundle_signature, strlen(bundle_signature));
 
@@ -463,10 +466,8 @@ int create_bundle(struct bundle_header *header, const char *path,
                goto err;
 
        /* write pack */
-       if (write_pack_data(bundle_fd, &revs)) {
-               bundle_fd = -1; /* already closed by the above call */
+       if (write_pack_data(bundle_fd, &revs))
                goto err;
-       }
 
        if (!bundle_to_stdout) {
                if (commit_lock_file(&lock))
@@ -474,11 +475,7 @@ int create_bundle(struct bundle_header *header, const char *path,
        }
        return 0;
 err:
-       if (!bundle_to_stdout) {
-               if (0 <= bundle_fd)
-                       close(bundle_fd);
-               rollback_lock_file(&lock);
-       }
+       rollback_lock_file(&lock);
        return -1;
 }
 
index 348d9b3bc7ad3ea512f68b6200481c9f6b90d792..cf39e9e2437f06b4725b61ad2194dd65b4857f82 100755 (executable)
@@ -71,4 +71,10 @@ test_expect_success 'prerequisites with an empty commit message' '
        git bundle verify bundle
 '
 
+test_expect_success 'failed bundle creation does not leave cruft' '
+       # This fails because the bundle would be empty.
+       test_must_fail git bundle create fail.bundle master..master &&
+       test_path_is_missing fail.bundle.lock
+'
+
 test_done