pack-write.con commit write_idx_file: introduce a struct to hold idx customization options (ebcfb37)
   1#include "cache.h"
   2#include "pack.h"
   3#include "csum-file.h"
   4
   5void reset_pack_idx_option(struct pack_idx_option *opts)
   6{
   7        memset(opts, 0, sizeof(*opts));
   8        opts->version = 2;
   9        opts->off32_limit = 0x7fffffff;
  10}
  11
  12static int sha1_compare(const void *_a, const void *_b)
  13{
  14        struct pack_idx_entry *a = *(struct pack_idx_entry **)_a;
  15        struct pack_idx_entry *b = *(struct pack_idx_entry **)_b;
  16        return hashcmp(a->sha1, b->sha1);
  17}
  18
  19/*
  20 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
  21 * the SHA1 hash of sorted object names. The objects array passed in
  22 * will be sorted by SHA1 on exit.
  23 */
  24const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects,
  25                           int nr_objects, const struct pack_idx_option *opts,
  26                           unsigned char *sha1)
  27{
  28        struct sha1file *f;
  29        struct pack_idx_entry **sorted_by_sha, **list, **last;
  30        off_t last_obj_offset = 0;
  31        uint32_t array[256];
  32        int i, fd;
  33        git_SHA_CTX ctx;
  34        uint32_t index_version;
  35
  36        if (nr_objects) {
  37                sorted_by_sha = objects;
  38                list = sorted_by_sha;
  39                last = sorted_by_sha + nr_objects;
  40                for (i = 0; i < nr_objects; ++i) {
  41                        if (objects[i]->offset > last_obj_offset)
  42                                last_obj_offset = objects[i]->offset;
  43                }
  44                qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
  45                      sha1_compare);
  46        }
  47        else
  48                sorted_by_sha = list = last = NULL;
  49
  50        if (!index_name) {
  51                static char tmpfile[PATH_MAX];
  52                fd = odb_mkstemp(tmpfile, sizeof(tmpfile), "pack/tmp_idx_XXXXXX");
  53                index_name = xstrdup(tmpfile);
  54        } else {
  55                unlink(index_name);
  56                fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
  57        }
  58        if (fd < 0)
  59                die_errno("unable to create '%s'", index_name);
  60        f = sha1fd(fd, index_name);
  61
  62        /* if last object's offset is >= 2^31 we should use index V2 */
  63        index_version = (last_obj_offset >> 31) ? 2 : opts->version;
  64
  65        /* index versions 2 and above need a header */
  66        if (index_version >= 2) {
  67                struct pack_idx_header hdr;
  68                hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
  69                hdr.idx_version = htonl(index_version);
  70                sha1write(f, &hdr, sizeof(hdr));
  71        }
  72
  73        /*
  74         * Write the first-level table (the list is sorted,
  75         * but we use a 256-entry lookup to be able to avoid
  76         * having to do eight extra binary search iterations).
  77         */
  78        for (i = 0; i < 256; i++) {
  79                struct pack_idx_entry **next = list;
  80                while (next < last) {
  81                        struct pack_idx_entry *obj = *next;
  82                        if (obj->sha1[0] != i)
  83                                break;
  84                        next++;
  85                }
  86                array[i] = htonl(next - sorted_by_sha);
  87                list = next;
  88        }
  89        sha1write(f, array, 256 * 4);
  90
  91        /* compute the SHA1 hash of sorted object names. */
  92        git_SHA1_Init(&ctx);
  93
  94        /*
  95         * Write the actual SHA1 entries..
  96         */
  97        list = sorted_by_sha;
  98        for (i = 0; i < nr_objects; i++) {
  99                struct pack_idx_entry *obj = *list++;
 100                if (index_version < 2) {
 101                        uint32_t offset = htonl(obj->offset);
 102                        sha1write(f, &offset, 4);
 103                }
 104                sha1write(f, obj->sha1, 20);
 105                git_SHA1_Update(&ctx, obj->sha1, 20);
 106        }
 107
 108        if (index_version >= 2) {
 109                unsigned int nr_large_offset = 0;
 110
 111                /* write the crc32 table */
 112                list = sorted_by_sha;
 113                for (i = 0; i < nr_objects; i++) {
 114                        struct pack_idx_entry *obj = *list++;
 115                        uint32_t crc32_val = htonl(obj->crc32);
 116                        sha1write(f, &crc32_val, 4);
 117                }
 118
 119                /* write the 32-bit offset table */
 120                list = sorted_by_sha;
 121                for (i = 0; i < nr_objects; i++) {
 122                        struct pack_idx_entry *obj = *list++;
 123                        uint32_t offset = (obj->offset <= opts->off32_limit) ?
 124                                obj->offset : (0x80000000 | nr_large_offset++);
 125                        offset = htonl(offset);
 126                        sha1write(f, &offset, 4);
 127                }
 128
 129                /* write the large offset table */
 130                list = sorted_by_sha;
 131                while (nr_large_offset) {
 132                        struct pack_idx_entry *obj = *list++;
 133                        uint64_t offset = obj->offset;
 134                        if (offset > opts->off32_limit) {
 135                                uint32_t split[2];
 136                                split[0] = htonl(offset >> 32);
 137                                split[1] = htonl(offset & 0xffffffff);
 138                                sha1write(f, split, 8);
 139                                nr_large_offset--;
 140                        }
 141                }
 142        }
 143
 144        sha1write(f, sha1, 20);
 145        sha1close(f, NULL, CSUM_FSYNC);
 146        git_SHA1_Final(sha1, &ctx);
 147        return index_name;
 148}
 149
 150/*
 151 * Update pack header with object_count and compute new SHA1 for pack data
 152 * associated to pack_fd, and write that SHA1 at the end.  That new SHA1
 153 * is also returned in new_pack_sha1.
 154 *
 155 * If partial_pack_sha1 is non null, then the SHA1 of the existing pack
 156 * (without the header update) is computed and validated against the
 157 * one provided in partial_pack_sha1.  The validation is performed at
 158 * partial_pack_offset bytes in the pack file.  The SHA1 of the remaining
 159 * data (i.e. from partial_pack_offset to the end) is then computed and
 160 * returned in partial_pack_sha1.
 161 *
 162 * Note that new_pack_sha1 is updated last, so both new_pack_sha1 and
 163 * partial_pack_sha1 can refer to the same buffer if the caller is not
 164 * interested in the resulting SHA1 of pack data above partial_pack_offset.
 165 */
 166void fixup_pack_header_footer(int pack_fd,
 167                         unsigned char *new_pack_sha1,
 168                         const char *pack_name,
 169                         uint32_t object_count,
 170                         unsigned char *partial_pack_sha1,
 171                         off_t partial_pack_offset)
 172{
 173        int aligned_sz, buf_sz = 8 * 1024;
 174        git_SHA_CTX old_sha1_ctx, new_sha1_ctx;
 175        struct pack_header hdr;
 176        char *buf;
 177
 178        git_SHA1_Init(&old_sha1_ctx);
 179        git_SHA1_Init(&new_sha1_ctx);
 180
 181        if (lseek(pack_fd, 0, SEEK_SET) != 0)
 182                die_errno("Failed seeking to start of '%s'", pack_name);
 183        if (read_in_full(pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
 184                die_errno("Unable to reread header of '%s'", pack_name);
 185        if (lseek(pack_fd, 0, SEEK_SET) != 0)
 186                die_errno("Failed seeking to start of '%s'", pack_name);
 187        git_SHA1_Update(&old_sha1_ctx, &hdr, sizeof(hdr));
 188        hdr.hdr_entries = htonl(object_count);
 189        git_SHA1_Update(&new_sha1_ctx, &hdr, sizeof(hdr));
 190        write_or_die(pack_fd, &hdr, sizeof(hdr));
 191        partial_pack_offset -= sizeof(hdr);
 192
 193        buf = xmalloc(buf_sz);
 194        aligned_sz = buf_sz - sizeof(hdr);
 195        for (;;) {
 196                ssize_t m, n;
 197                m = (partial_pack_sha1 && partial_pack_offset < aligned_sz) ?
 198                        partial_pack_offset : aligned_sz;
 199                n = xread(pack_fd, buf, m);
 200                if (!n)
 201                        break;
 202                if (n < 0)
 203                        die_errno("Failed to checksum '%s'", pack_name);
 204                git_SHA1_Update(&new_sha1_ctx, buf, n);
 205
 206                aligned_sz -= n;
 207                if (!aligned_sz)
 208                        aligned_sz = buf_sz;
 209
 210                if (!partial_pack_sha1)
 211                        continue;
 212
 213                git_SHA1_Update(&old_sha1_ctx, buf, n);
 214                partial_pack_offset -= n;
 215                if (partial_pack_offset == 0) {
 216                        unsigned char sha1[20];
 217                        git_SHA1_Final(sha1, &old_sha1_ctx);
 218                        if (hashcmp(sha1, partial_pack_sha1) != 0)
 219                                die("Unexpected checksum for %s "
 220                                    "(disk corruption?)", pack_name);
 221                        /*
 222                         * Now let's compute the SHA1 of the remainder of the
 223                         * pack, which also means making partial_pack_offset
 224                         * big enough not to matter anymore.
 225                         */
 226                        git_SHA1_Init(&old_sha1_ctx);
 227                        partial_pack_offset = ~partial_pack_offset;
 228                        partial_pack_offset -= MSB(partial_pack_offset, 1);
 229                }
 230        }
 231        free(buf);
 232
 233        if (partial_pack_sha1)
 234                git_SHA1_Final(partial_pack_sha1, &old_sha1_ctx);
 235        git_SHA1_Final(new_pack_sha1, &new_sha1_ctx);
 236        write_or_die(pack_fd, new_pack_sha1, 20);
 237        fsync_or_die(pack_fd, pack_name);
 238}
 239
 240char *index_pack_lockfile(int ip_out)
 241{
 242        char packname[46];
 243
 244        /*
 245         * The first thing we expect from index-pack's output
 246         * is "pack\t%40s\n" or "keep\t%40s\n" (46 bytes) where
 247         * %40s is the newly created pack SHA1 name.  In the "keep"
 248         * case, we need it to remove the corresponding .keep file
 249         * later on.  If we don't get that then tough luck with it.
 250         */
 251        if (read_in_full(ip_out, packname, 46) == 46 && packname[45] == '\n' &&
 252            memcmp(packname, "keep\t", 5) == 0) {
 253                char path[PATH_MAX];
 254                packname[45] = 0;
 255                snprintf(path, sizeof(path), "%s/pack/pack-%s.keep",
 256                         get_object_directory(), packname + 5);
 257                return xstrdup(path);
 258        }
 259        return NULL;
 260}
 261
 262/*
 263 * The per-object header is a pretty dense thing, which is
 264 *  - first byte: low four bits are "size", then three bits of "type",
 265 *    and the high bit is "size continues".
 266 *  - each byte afterwards: low seven bits are size continuation,
 267 *    with the high bit being "size continues"
 268 */
 269int encode_in_pack_object_header(enum object_type type, uintmax_t size, unsigned char *hdr)
 270{
 271        int n = 1;
 272        unsigned char c;
 273
 274        if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
 275                die("bad type %d", type);
 276
 277        c = (type << 4) | (size & 15);
 278        size >>= 4;
 279        while (size) {
 280                *hdr++ = c | 0x80;
 281                c = size & 0x7f;
 282                size >>= 7;
 283                n++;
 284        }
 285        *hdr = c;
 286        return n;
 287}