delta-islands.con commit repack: add delta-islands support (16d75fa)
   1#include "cache.h"
   2#include "attr.h"
   3#include "object.h"
   4#include "blob.h"
   5#include "commit.h"
   6#include "tag.h"
   7#include "tree.h"
   8#include "delta.h"
   9#include "pack.h"
  10#include "tree-walk.h"
  11#include "diff.h"
  12#include "revision.h"
  13#include "list-objects.h"
  14#include "progress.h"
  15#include "refs.h"
  16#include "khash.h"
  17#include "pack-bitmap.h"
  18#include "pack-objects.h"
  19#include "delta-islands.h"
  20#include "sha1-array.h"
  21#include "config.h"
  22
  23KHASH_INIT(str, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal)
  24
  25static khash_sha1 *island_marks;
  26static unsigned island_counter;
  27static unsigned island_counter_core;
  28
  29static kh_str_t *remote_islands;
  30
  31struct remote_island {
  32        uint64_t hash;
  33        struct oid_array oids;
  34};
  35
  36struct island_bitmap {
  37        uint32_t refcount;
  38        uint32_t bits[FLEX_ARRAY];
  39};
  40
  41static uint32_t island_bitmap_size;
  42
  43/*
  44 * Allocate a new bitmap; if "old" is not NULL, the new bitmap will be a copy
  45 * of "old". Otherwise, the new bitmap is empty.
  46 */
  47static struct island_bitmap *island_bitmap_new(const struct island_bitmap *old)
  48{
  49        size_t size = sizeof(struct island_bitmap) + (island_bitmap_size * 4);
  50        struct island_bitmap *b = xcalloc(1, size);
  51
  52        if (old)
  53                memcpy(b, old, size);
  54
  55        b->refcount = 1;
  56        return b;
  57}
  58
  59static void island_bitmap_or(struct island_bitmap *a, const struct island_bitmap *b)
  60{
  61        uint32_t i;
  62
  63        for (i = 0; i < island_bitmap_size; ++i)
  64                a->bits[i] |= b->bits[i];
  65}
  66
  67static int island_bitmap_is_subset(struct island_bitmap *self,
  68                struct island_bitmap *super)
  69{
  70        uint32_t i;
  71
  72        if (self == super)
  73                return 1;
  74
  75        for (i = 0; i < island_bitmap_size; ++i) {
  76                if ((self->bits[i] & super->bits[i]) != self->bits[i])
  77                        return 0;
  78        }
  79
  80        return 1;
  81}
  82
  83#define ISLAND_BITMAP_BLOCK(x) (x / 32)
  84#define ISLAND_BITMAP_MASK(x) (1 << (x % 32))
  85
  86static void island_bitmap_set(struct island_bitmap *self, uint32_t i)
  87{
  88        self->bits[ISLAND_BITMAP_BLOCK(i)] |= ISLAND_BITMAP_MASK(i);
  89}
  90
  91static int island_bitmap_get(struct island_bitmap *self, uint32_t i)
  92{
  93        return (self->bits[ISLAND_BITMAP_BLOCK(i)] & ISLAND_BITMAP_MASK(i)) != 0;
  94}
  95
  96int in_same_island(const struct object_id *trg_oid, const struct object_id *src_oid)
  97{
  98        khiter_t trg_pos, src_pos;
  99
 100        /* If we aren't using islands, assume everything goes together. */
 101        if (!island_marks)
 102                return 1;
 103
 104        /*
 105         * If we don't have a bitmap for the target, we can delta it
 106         * against anything -- it's not an important object
 107         */
 108        trg_pos = kh_get_sha1(island_marks, trg_oid->hash);
 109        if (trg_pos >= kh_end(island_marks))
 110                return 1;
 111
 112        /*
 113         * if the source (our delta base) doesn't have a bitmap,
 114         * we don't want to base any deltas on it!
 115         */
 116        src_pos = kh_get_sha1(island_marks, src_oid->hash);
 117        if (src_pos >= kh_end(island_marks))
 118                return 0;
 119
 120        return island_bitmap_is_subset(kh_value(island_marks, trg_pos),
 121                                kh_value(island_marks, src_pos));
 122}
 123
 124int island_delta_cmp(const struct object_id *a, const struct object_id *b)
 125{
 126        khiter_t a_pos, b_pos;
 127        struct island_bitmap *a_bitmap = NULL, *b_bitmap = NULL;
 128
 129        if (!island_marks)
 130                return 0;
 131
 132        a_pos = kh_get_sha1(island_marks, a->hash);
 133        if (a_pos < kh_end(island_marks))
 134                a_bitmap = kh_value(island_marks, a_pos);
 135
 136        b_pos = kh_get_sha1(island_marks, b->hash);
 137        if (b_pos < kh_end(island_marks))
 138                b_bitmap = kh_value(island_marks, b_pos);
 139
 140        if (a_bitmap) {
 141                if (!b_bitmap || !island_bitmap_is_subset(a_bitmap, b_bitmap))
 142                        return -1;
 143        }
 144        if (b_bitmap) {
 145                if (!a_bitmap || !island_bitmap_is_subset(b_bitmap, a_bitmap))
 146                        return 1;
 147        }
 148
 149        return 0;
 150}
 151
 152static struct island_bitmap *create_or_get_island_marks(struct object *obj)
 153{
 154        khiter_t pos;
 155        int hash_ret;
 156
 157        pos = kh_put_sha1(island_marks, obj->oid.hash, &hash_ret);
 158        if (hash_ret)
 159                kh_value(island_marks, pos) = island_bitmap_new(NULL);
 160
 161        return kh_value(island_marks, pos);
 162}
 163
 164static void set_island_marks(struct object *obj, struct island_bitmap *marks)
 165{
 166        struct island_bitmap *b;
 167        khiter_t pos;
 168        int hash_ret;
 169
 170        pos = kh_put_sha1(island_marks, obj->oid.hash, &hash_ret);
 171        if (hash_ret) {
 172                /*
 173                 * We don't have one yet; make a copy-on-write of the
 174                 * parent.
 175                 */
 176                marks->refcount++;
 177                kh_value(island_marks, pos) = marks;
 178                return;
 179        }
 180
 181        /*
 182         * We do have it. Make sure we split any copy-on-write before
 183         * updating.
 184         */
 185        b = kh_value(island_marks, pos);
 186        if (b->refcount > 1) {
 187                b->refcount--;
 188                b = kh_value(island_marks, pos) = island_bitmap_new(b);
 189        }
 190        island_bitmap_or(b, marks);
 191}
 192
 193static void mark_remote_island_1(struct remote_island *rl, int is_core_island)
 194{
 195        uint32_t i;
 196
 197        for (i = 0; i < rl->oids.nr; ++i) {
 198                struct island_bitmap *marks;
 199                struct object *obj = parse_object(the_repository, &rl->oids.oid[i]);
 200
 201                if (!obj)
 202                        continue;
 203
 204                marks = create_or_get_island_marks(obj);
 205                island_bitmap_set(marks, island_counter);
 206
 207                if (is_core_island && obj->type == OBJ_COMMIT)
 208                        obj->flags |= NEEDS_BITMAP;
 209
 210                /* If it was a tag, also make sure we hit the underlying object. */
 211                while (obj && obj->type == OBJ_TAG) {
 212                        obj = ((struct tag *)obj)->tagged;
 213                        if (obj) {
 214                                parse_object(the_repository, &obj->oid);
 215                                marks = create_or_get_island_marks(obj);
 216                                island_bitmap_set(marks, island_counter);
 217                        }
 218                }
 219        }
 220
 221        if (is_core_island)
 222                island_counter_core = island_counter;
 223
 224        island_counter++;
 225}
 226
 227static int cmp_tree_depth(const void *va, const void *vb)
 228{
 229        struct object_entry *a = *(struct object_entry **)va;
 230        struct object_entry *b = *(struct object_entry **)vb;
 231        return a->tree_depth - b->tree_depth;
 232}
 233
 234void resolve_tree_islands(int progress, struct packing_data *to_pack)
 235{
 236        struct progress *progress_state = NULL;
 237        struct object_entry **todo;
 238        int nr = 0;
 239        int i;
 240
 241        if (!island_marks)
 242                return;
 243
 244        /*
 245         * We process only trees, as commits and tags have already been handled
 246         * (and passed their marks on to root trees, as well. We must make sure
 247         * to process them in descending tree-depth order so that marks
 248         * propagate down the tree properly, even if a sub-tree is found in
 249         * multiple parent trees.
 250         */
 251        ALLOC_ARRAY(todo, to_pack->nr_objects);
 252        for (i = 0; i < to_pack->nr_objects; i++) {
 253                if (oe_type(&to_pack->objects[i]) == OBJ_TREE)
 254                        todo[nr++] = &to_pack->objects[i];
 255        }
 256        QSORT(todo, nr, cmp_tree_depth);
 257
 258        if (progress)
 259                progress_state = start_progress(_("Propagating island marks"), nr);
 260
 261        for (i = 0; i < nr; i++) {
 262                struct object_entry *ent = todo[i];
 263                struct island_bitmap *root_marks;
 264                struct tree *tree;
 265                struct tree_desc desc;
 266                struct name_entry entry;
 267                khiter_t pos;
 268
 269                pos = kh_get_sha1(island_marks, ent->idx.oid.hash);
 270                if (pos >= kh_end(island_marks))
 271                        continue;
 272
 273                root_marks = kh_value(island_marks, pos);
 274
 275                tree = lookup_tree(the_repository, &ent->idx.oid);
 276                if (!tree || parse_tree(tree) < 0)
 277                        die(_("bad tree object %s"), oid_to_hex(&ent->idx.oid));
 278
 279                init_tree_desc(&desc, tree->buffer, tree->size);
 280                while (tree_entry(&desc, &entry)) {
 281                        struct object *obj;
 282
 283                        if (S_ISGITLINK(entry.mode))
 284                                continue;
 285
 286                        obj = lookup_object(the_repository, entry.oid->hash);
 287                        if (!obj)
 288                                continue;
 289
 290                        set_island_marks(obj, root_marks);
 291                }
 292
 293                free_tree_buffer(tree);
 294
 295                display_progress(progress_state, i+1);
 296        }
 297
 298        stop_progress(&progress_state);
 299        free(todo);
 300}
 301
 302static regex_t *island_regexes;
 303static unsigned int island_regexes_alloc, island_regexes_nr;
 304static const char *core_island_name;
 305
 306static int island_config_callback(const char *k, const char *v, void *cb)
 307{
 308        if (!strcmp(k, "pack.island")) {
 309                struct strbuf re = STRBUF_INIT;
 310
 311                if (!v)
 312                        return config_error_nonbool(k);
 313
 314                ALLOC_GROW(island_regexes, island_regexes_nr + 1, island_regexes_alloc);
 315
 316                if (*v != '^')
 317                        strbuf_addch(&re, '^');
 318                strbuf_addstr(&re, v);
 319
 320                if (regcomp(&island_regexes[island_regexes_nr], re.buf, REG_EXTENDED))
 321                        die(_("failed to load island regex for '%s': %s"), k, re.buf);
 322
 323                strbuf_release(&re);
 324                island_regexes_nr++;
 325                return 0;
 326        }
 327
 328        if (!strcmp(k, "pack.islandcore"))
 329                return git_config_string(&core_island_name, k, v);
 330
 331        return 0;
 332}
 333
 334static void add_ref_to_island(const char *island_name, const struct object_id *oid)
 335{
 336        uint64_t sha_core;
 337        struct remote_island *rl = NULL;
 338
 339        int hash_ret;
 340        khiter_t pos = kh_put_str(remote_islands, island_name, &hash_ret);
 341
 342        if (hash_ret) {
 343                kh_key(remote_islands, pos) = xstrdup(island_name);
 344                kh_value(remote_islands, pos) = xcalloc(1, sizeof(struct remote_island));
 345        }
 346
 347        rl = kh_value(remote_islands, pos);
 348        oid_array_append(&rl->oids, oid);
 349
 350        memcpy(&sha_core, oid->hash, sizeof(uint64_t));
 351        rl->hash += sha_core;
 352}
 353
 354static int find_island_for_ref(const char *refname, const struct object_id *oid,
 355                               int flags, void *data)
 356{
 357        /*
 358         * We should advertise 'ARRAY_SIZE(matches) - 2' as the max,
 359         * so we can diagnose below a config with more capture groups
 360         * than we support.
 361         */
 362        regmatch_t matches[16];
 363        int i, m;
 364        struct strbuf island_name = STRBUF_INIT;
 365
 366        /* walk backwards to get last-one-wins ordering */
 367        for (i = island_regexes_nr - 1; i >= 0; i--) {
 368                if (!regexec(&island_regexes[i], refname,
 369                             ARRAY_SIZE(matches), matches, 0))
 370                        break;
 371        }
 372
 373        if (i < 0)
 374                return 0;
 375
 376        if (matches[ARRAY_SIZE(matches) - 1].rm_so != -1)
 377                warning(_("island regex from config has "
 378                          "too many capture groups (max=%d)"),
 379                        (int)ARRAY_SIZE(matches) - 2);
 380
 381        for (m = 1; m < ARRAY_SIZE(matches); m++) {
 382                regmatch_t *match = &matches[m];
 383
 384                if (match->rm_so == -1)
 385                        continue;
 386
 387                if (island_name.len)
 388                        strbuf_addch(&island_name, '-');
 389
 390                strbuf_add(&island_name, refname + match->rm_so, match->rm_eo - match->rm_so);
 391        }
 392
 393        add_ref_to_island(island_name.buf, oid);
 394        strbuf_release(&island_name);
 395        return 0;
 396}
 397
 398static struct remote_island *get_core_island(void)
 399{
 400        if (core_island_name) {
 401                khiter_t pos = kh_get_str(remote_islands, core_island_name);
 402                if (pos < kh_end(remote_islands))
 403                        return kh_value(remote_islands, pos);
 404        }
 405
 406        return NULL;
 407}
 408
 409static void deduplicate_islands(void)
 410{
 411        struct remote_island *island, *core = NULL, **list;
 412        unsigned int island_count, dst, src, ref, i = 0;
 413
 414        island_count = kh_size(remote_islands);
 415        ALLOC_ARRAY(list, island_count);
 416
 417        kh_foreach_value(remote_islands, island, {
 418                list[i++] = island;
 419        });
 420
 421        for (ref = 0; ref + 1 < island_count; ref++) {
 422                for (src = ref + 1, dst = src; src < island_count; src++) {
 423                        if (list[ref]->hash == list[src]->hash)
 424                                continue;
 425
 426                        if (src != dst)
 427                                list[dst] = list[src];
 428
 429                        dst++;
 430                }
 431                island_count = dst;
 432        }
 433
 434        island_bitmap_size = (island_count / 32) + 1;
 435        core = get_core_island();
 436
 437        for (i = 0; i < island_count; ++i) {
 438                mark_remote_island_1(list[i], core && list[i]->hash == core->hash);
 439        }
 440
 441        free(list);
 442}
 443
 444void load_delta_islands(void)
 445{
 446        island_marks = kh_init_sha1();
 447        remote_islands = kh_init_str();
 448
 449        git_config(island_config_callback, NULL);
 450        for_each_ref(find_island_for_ref, NULL);
 451        deduplicate_islands();
 452
 453        fprintf(stderr, _("Marked %d islands, done.\n"), island_counter);
 454}
 455
 456void propagate_island_marks(struct commit *commit)
 457{
 458        khiter_t pos = kh_get_sha1(island_marks, commit->object.oid.hash);
 459
 460        if (pos < kh_end(island_marks)) {
 461                struct commit_list *p;
 462                struct island_bitmap *root_marks = kh_value(island_marks, pos);
 463
 464                parse_commit(commit);
 465                set_island_marks(&get_commit_tree(commit)->object, root_marks);
 466                for (p = commit->parents; p; p = p->next)
 467                        set_island_marks(&p->item->object, root_marks);
 468        }
 469}
 470
 471int compute_pack_layers(struct packing_data *to_pack)
 472{
 473        uint32_t i;
 474
 475        if (!core_island_name || !island_marks)
 476                return 1;
 477
 478        for (i = 0; i < to_pack->nr_objects; ++i) {
 479                struct object_entry *entry = &to_pack->objects[i];
 480                khiter_t pos = kh_get_sha1(island_marks, entry->idx.oid.hash);
 481
 482                entry->layer = 1;
 483
 484                if (pos < kh_end(island_marks)) {
 485                        struct island_bitmap *bitmap = kh_value(island_marks, pos);
 486
 487                        if (island_bitmap_get(bitmap, island_counter_core))
 488                                entry->layer = 0;
 489                }
 490        }
 491
 492        return 2;
 493}