pack-objects: document about thread synchronization
[gitweb.git] / builtin / pack-objects.c
index 53a0fb6ef2a8eec242359463cb5182973837c36f..567f5b41b8900e99c7887aea8a30fdc407605237 100644 (file)
@@ -279,6 +279,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
        enum object_type type;
        void *buf;
        struct git_istream *st = NULL;
+       const unsigned hashsz = the_hash_algo->rawsz;
 
        if (!usable_delta) {
                if (oe_type(entry) == OBJ_BLOB &&
@@ -335,7 +336,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
                dheader[pos] = ofs & 127;
                while (ofs >>= 7)
                        dheader[--pos] = 128 | (--ofs & 127);
-               if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
+               if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
                        if (st)
                                close_istream(st);
                        free(buf);
@@ -347,19 +348,19 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
        } else if (type == OBJ_REF_DELTA) {
                /*
                 * Deltas with a base reference contain
-                * an additional 20 bytes for the base sha1.
+                * additional bytes for the base object ID.
                 */
-               if (limit && hdrlen + 20 + datalen + 20 >= limit) {
+               if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
                        if (st)
                                close_istream(st);
                        free(buf);
                        return 0;
                }
                hashwrite(f, header, hdrlen);
-               hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
-               hdrlen += 20;
+               hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
+               hdrlen += hashsz;
        } else {
-               if (limit && hdrlen + datalen + 20 >= limit) {
+               if (limit && hdrlen + datalen + hashsz >= limit) {
                        if (st)
                                close_istream(st);
                        free(buf);
@@ -391,6 +392,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
        unsigned char header[MAX_PACK_OBJECT_HEADER],
                      dheader[MAX_PACK_OBJECT_HEADER];
        unsigned hdrlen;
+       const unsigned hashsz = the_hash_algo->rawsz;
        unsigned long entry_size = SIZE(entry);
 
        if (DELTA(entry))
@@ -427,7 +429,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
                dheader[pos] = ofs & 127;
                while (ofs >>= 7)
                        dheader[--pos] = 128 | (--ofs & 127);
-               if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
+               if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
                        unuse_pack(&w_curs);
                        return 0;
                }
@@ -436,16 +438,16 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
                hdrlen += sizeof(dheader) - pos;
                reused_delta++;
        } else if (type == OBJ_REF_DELTA) {
-               if (limit && hdrlen + 20 + datalen + 20 >= limit) {
+               if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
                        unuse_pack(&w_curs);
                        return 0;
                }
                hashwrite(f, header, hdrlen);
-               hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
-               hdrlen += 20;
+               hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
+               hdrlen += hashsz;
                reused_delta++;
        } else {
-               if (limit && hdrlen + datalen + 20 >= limit) {
+               if (limit && hdrlen + datalen + hashsz >= limit) {
                        unuse_pack(&w_curs);
                        return 0;
                }
@@ -769,7 +771,7 @@ static off_t write_reused_pack(struct hashfile *f)
                die_errno("unable to seek in reused packfile");
 
        if (reuse_packfile_offset < 0)
-               reuse_packfile_offset = reuse_packfile->pack_size - 20;
+               reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz;
 
        total = to_write = reuse_packfile_offset - sizeof(struct pack_header);
 
@@ -1033,7 +1035,7 @@ static int want_object_in_pack(const struct object_id *oid,
        int want;
        struct list_head *pos;
 
-       if (!exclude && local && has_loose_object_nonlocal(oid->hash))
+       if (!exclude && local && has_loose_object_nonlocal(oid))
                return 0;
 
        /*
@@ -1467,7 +1469,7 @@ static void check_object(struct object_entry *entry)
                        if (reuse_delta && !entry->preferred_base)
                                base_ref = use_pack(p, &w_curs,
                                                entry->in_pack_offset + used, NULL);
-                       entry->in_pack_header_size = used + 20;
+                       entry->in_pack_header_size = used + the_hash_algo->rawsz;
                        break;
                case OBJ_OFS_DELTA:
                        buf = use_pack(p, &w_curs,
@@ -1850,18 +1852,30 @@ static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
 
 #ifndef NO_PTHREADS
 
+/* Protect access to object database */
 static pthread_mutex_t read_mutex;
 #define read_lock()            pthread_mutex_lock(&read_mutex)
 #define read_unlock()          pthread_mutex_unlock(&read_mutex)
 
+/* Protect delta_cache_size */
 static pthread_mutex_t cache_mutex;
 #define cache_lock()           pthread_mutex_lock(&cache_mutex)
 #define cache_unlock()         pthread_mutex_unlock(&cache_mutex)
 
+/*
+ * Protect object list partitioning (e.g. struct thread_param) and
+ * progress_state
+ */
 static pthread_mutex_t progress_mutex;
 #define progress_lock()                pthread_mutex_lock(&progress_mutex)
 #define progress_unlock()      pthread_mutex_unlock(&progress_mutex)
 
+/*
+ * Access to struct object_entry is unprotected since each thread owns
+ * a portion of the main object list. Just don't access object entries
+ * ahead in the list because they can be stolen and would need
+ * progress_mutex for protection.
+ */
 #else
 
 #define read_lock()            (void)0
@@ -1949,7 +1963,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
        /* Now some size filtering heuristics. */
        trg_size = SIZE(trg_entry);
        if (!DELTA(trg_entry)) {
-               max_size = trg_size/2 - 20;
+               max_size = trg_size/2 - the_hash_algo->rawsz;
                ref_depth = 1;
        } else {
                max_size = DELTA_SIZE(trg_entry);
@@ -2243,12 +2257,19 @@ static void try_to_free_from_threads(size_t size)
 static try_to_free_t old_try_to_free_routine;
 
 /*
+ * The main object list is split into smaller lists, each is handed to
+ * one worker.
+ *
  * The main thread waits on the condition that (at least) one of the workers
  * has stopped working (which is indicated in the .working member of
  * struct thread_params).
+ *
  * When a work thread has completed its work, it sets .working to 0 and
  * signals the main thread and waits on the condition that .data_ready
  * becomes 1.
+ *
+ * The main thread steals half of the work from the worker that has
+ * most work left to hand it to the idle worker.
  */
 
 struct thread_params {
@@ -3132,18 +3153,18 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
                         N_("do not create an empty pack output")),
                OPT_BOOL(0, "revs", &use_internal_rev_list,
                         N_("read revision arguments from standard input")),
-               { OPTION_SET_INT, 0, "unpacked", &rev_list_unpacked, NULL,
-                 N_("limit the objects to those that are not yet packed"),
-                 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
-               { OPTION_SET_INT, 0, "all", &rev_list_all, NULL,
-                 N_("include objects reachable from any reference"),
-                 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
-               { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL,
-                 N_("include objects referred by reflog entries"),
-                 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
-               { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL,
-                 N_("include objects referred to by the index"),
-                 PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
+               OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
+                             N_("limit the objects to those that are not yet packed"),
+                             1, PARSE_OPT_NONEG),
+               OPT_SET_INT_F(0, "all", &rev_list_all,
+                             N_("include objects reachable from any reference"),
+                             1, PARSE_OPT_NONEG),
+               OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
+                             N_("include objects referred by reflog entries"),
+                             1, PARSE_OPT_NONEG),
+               OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
+                             N_("include objects referred to by the index"),
+                             1, PARSE_OPT_NONEG),
                OPT_BOOL(0, "stdout", &pack_to_stdout,
                         N_("output pack to stdout")),
                OPT_BOOL(0, "include-tag", &include_tag,