delta-islands.con commit general improvements (43abf13)
   1#include "cache.h"
   2#include "attr.h"
   3#include "object.h"
   4#include "blob.h"
   5#include "commit.h"
   6#include "tag.h"
   7#include "tree.h"
   8#include "delta.h"
   9#include "pack.h"
  10#include "tree-walk.h"
  11#include "diff.h"
  12#include "revision.h"
  13#include "list-objects.h"
  14#include "progress.h"
  15#include "refs.h"
  16#include "khash.h"
  17#include "pack-bitmap.h"
  18#include "pack-objects.h"
  19#include "delta-islands.h"
  20#include "sha1-array.h"
  21#include "config.h"
  22
  23KHASH_INIT(str, const char *, void *, 1, kh_str_hash_func, kh_str_hash_equal)
  24
  25static kh_oid_map_t *island_marks;
  26static unsigned island_counter;
  27static unsigned island_counter_core;
  28
  29static kh_str_t *remote_islands;
  30
  31struct remote_island {
  32        uint64_t hash;
  33        struct oid_array oids;
  34};
  35
  36struct island_bitmap {
  37        uint32_t refcount;
  38        uint32_t bits[FLEX_ARRAY];
  39};
  40
  41static uint32_t island_bitmap_size;
  42
  43/*
  44 * Allocate a new bitmap; if "old" is not NULL, the new bitmap will be a copy
  45 * of "old". Otherwise, the new bitmap is empty.
  46 */
  47static struct island_bitmap *island_bitmap_new(const struct island_bitmap *old)
  48{
  49        size_t size = sizeof(struct island_bitmap) + (island_bitmap_size * 4);
  50        struct island_bitmap *b = xcalloc(1, size);
  51
  52        if (old)
  53                memcpy(b, old, size);
  54
  55        b->refcount = 1;
  56        return b;
  57}
  58
  59static void island_bitmap_or(struct island_bitmap *a, const struct island_bitmap *b)
  60{
  61        uint32_t i;
  62
  63        for (i = 0; i < island_bitmap_size; ++i)
  64                a->bits[i] |= b->bits[i];
  65}
  66
  67static int island_bitmap_is_subset(struct island_bitmap *self,
  68                struct island_bitmap *super)
  69{
  70        uint32_t i;
  71
  72        if (self == super)
  73                return 1;
  74
  75        for (i = 0; i < island_bitmap_size; ++i) {
  76                if ((self->bits[i] & super->bits[i]) != self->bits[i])
  77                        return 0;
  78        }
  79
  80        return 1;
  81}
  82
  83#define ISLAND_BITMAP_BLOCK(x) (x / 32)
  84#define ISLAND_BITMAP_MASK(x) (1 << (x % 32))
  85
  86static void island_bitmap_set(struct island_bitmap *self, uint32_t i)
  87{
  88        self->bits[ISLAND_BITMAP_BLOCK(i)] |= ISLAND_BITMAP_MASK(i);
  89}
  90
  91static int island_bitmap_get(struct island_bitmap *self, uint32_t i)
  92{
  93        return (self->bits[ISLAND_BITMAP_BLOCK(i)] & ISLAND_BITMAP_MASK(i)) != 0;
  94}
  95
  96int in_same_island(const struct object_id *trg_oid, const struct object_id *src_oid)
  97{
  98        khiter_t trg_pos, src_pos;
  99
 100        /* If we aren't using islands, assume everything goes together. */
 101        if (!island_marks)
 102                return 1;
 103
 104        /*
 105         * If we don't have a bitmap for the target, we can delta it
 106         * against anything -- it's not an important object
 107         */
 108        trg_pos = kh_get_oid_map(island_marks, *trg_oid);
 109        if (trg_pos >= kh_end(island_marks))
 110                return 1;
 111
 112        /*
 113         * if the source (our delta base) doesn't have a bitmap,
 114         * we don't want to base any deltas on it!
 115         */
 116        src_pos = kh_get_oid_map(island_marks, *src_oid);
 117        if (src_pos >= kh_end(island_marks))
 118                return 0;
 119
 120        return island_bitmap_is_subset(kh_value(island_marks, trg_pos),
 121                                kh_value(island_marks, src_pos));
 122}
 123
 124int island_delta_cmp(const struct object_id *a, const struct object_id *b)
 125{
 126        khiter_t a_pos, b_pos;
 127        struct island_bitmap *a_bitmap = NULL, *b_bitmap = NULL;
 128
 129        if (!island_marks)
 130                return 0;
 131
 132        a_pos = kh_get_oid_map(island_marks, *a);
 133        if (a_pos < kh_end(island_marks))
 134                a_bitmap = kh_value(island_marks, a_pos);
 135
 136        b_pos = kh_get_oid_map(island_marks, *b);
 137        if (b_pos < kh_end(island_marks))
 138                b_bitmap = kh_value(island_marks, b_pos);
 139
 140        if (a_bitmap) {
 141                if (!b_bitmap || !island_bitmap_is_subset(a_bitmap, b_bitmap))
 142                        return -1;
 143        }
 144        if (b_bitmap) {
 145                if (!a_bitmap || !island_bitmap_is_subset(b_bitmap, a_bitmap))
 146                        return 1;
 147        }
 148
 149        return 0;
 150}
 151
 152static struct island_bitmap *create_or_get_island_marks(struct object *obj)
 153{
 154        khiter_t pos;
 155        int hash_ret;
 156
 157        pos = kh_put_oid_map(island_marks, obj->oid, &hash_ret);
 158        if (hash_ret)
 159                kh_value(island_marks, pos) = island_bitmap_new(NULL);
 160
 161        return kh_value(island_marks, pos);
 162}
 163
 164static void set_island_marks(struct object *obj, struct island_bitmap *marks)
 165{
 166        struct island_bitmap *b;
 167        khiter_t pos;
 168        int hash_ret;
 169
 170        pos = kh_put_oid_map(island_marks, obj->oid, &hash_ret);
 171        if (hash_ret) {
 172                /*
 173                 * We don't have one yet; make a copy-on-write of the
 174                 * parent.
 175                 */
 176                marks->refcount++;
 177                kh_value(island_marks, pos) = marks;
 178                return;
 179        }
 180
 181        /*
 182         * We do have it. Make sure we split any copy-on-write before
 183         * updating.
 184         */
 185        b = kh_value(island_marks, pos);
 186        if (b->refcount > 1) {
 187                b->refcount--;
 188                b = kh_value(island_marks, pos) = island_bitmap_new(b);
 189        }
 190        island_bitmap_or(b, marks);
 191}
 192
 193static void mark_remote_island_1(struct repository *r,
 194                                 struct remote_island *rl,
 195                                 int is_core_island)
 196{
 197        uint32_t i;
 198
 199        for (i = 0; i < rl->oids.nr; ++i) {
 200                struct island_bitmap *marks;
 201                struct object *obj = parse_object(r, &rl->oids.oid[i]);
 202
 203                if (!obj)
 204                        continue;
 205
 206                marks = create_or_get_island_marks(obj);
 207                island_bitmap_set(marks, island_counter);
 208
 209                if (is_core_island && obj->type == OBJ_COMMIT)
 210                        obj->flags |= NEEDS_BITMAP;
 211
 212                /* If it was a tag, also make sure we hit the underlying object. */
 213                while (obj && obj->type == OBJ_TAG) {
 214                        obj = ((struct tag *)obj)->tagged;
 215                        if (obj) {
 216                                parse_object(r, &obj->oid);
 217                                marks = create_or_get_island_marks(obj);
 218                                island_bitmap_set(marks, island_counter);
 219                        }
 220                }
 221        }
 222
 223        if (is_core_island)
 224                island_counter_core = island_counter;
 225
 226        island_counter++;
 227}
 228
 229struct tree_islands_todo {
 230        struct object_entry *entry;
 231        unsigned int depth;
 232};
 233
 234static int tree_depth_compare(const void *a, const void *b)
 235{
 236        const struct tree_islands_todo *todo_a = a;
 237        const struct tree_islands_todo *todo_b = b;
 238
 239        return todo_a->depth - todo_b->depth;
 240}
 241
 242void resolve_tree_islands(struct repository *r,
 243                          int progress,
 244                          struct packing_data *to_pack)
 245{
 246        struct progress *progress_state = NULL;
 247        struct tree_islands_todo *todo;
 248        int nr = 0;
 249        int i;
 250
 251        if (!island_marks)
 252                return;
 253
 254        /*
 255         * We process only trees, as commits and tags have already been handled
 256         * (and passed their marks on to root trees, as well. We must make sure
 257         * to process them in descending tree-depth order so that marks
 258         * propagate down the tree properly, even if a sub-tree is found in
 259         * multiple parent trees.
 260         */
 261        ALLOC_ARRAY(todo, to_pack->nr_objects);
 262        for (i = 0; i < to_pack->nr_objects; i++) {
 263                if (oe_type(&to_pack->objects[i]) == OBJ_TREE) {
 264                        todo[nr].entry = &to_pack->objects[i];
 265                        todo[nr].depth = oe_tree_depth(to_pack, &to_pack->objects[i]);
 266                        nr++;
 267                }
 268        }
 269        QSORT(todo, nr, tree_depth_compare);
 270
 271        if (progress)
 272                progress_state = start_progress(_("Propagating island marks"), nr);
 273
 274        for (i = 0; i < nr; i++) {
 275                struct object_entry *ent = todo[i].entry;
 276                struct island_bitmap *root_marks;
 277                struct tree *tree;
 278                struct tree_desc desc;
 279                struct name_entry entry;
 280                khiter_t pos;
 281
 282                pos = kh_get_oid_map(island_marks, ent->idx.oid);
 283                if (pos >= kh_end(island_marks))
 284                        continue;
 285
 286                root_marks = kh_value(island_marks, pos);
 287
 288                tree = lookup_tree(r, &ent->idx.oid);
 289                if (!tree || parse_tree(tree) < 0)
 290                        die(_("bad tree object %s"), oid_to_hex(&ent->idx.oid));
 291
 292                init_tree_desc(&desc, tree->buffer, tree->size);
 293                while (tree_entry(&desc, &entry)) {
 294                        struct object *obj;
 295
 296                        if (S_ISGITLINK(entry.mode))
 297                                continue;
 298
 299                        obj = lookup_object(r, &entry.oid);
 300                        if (!obj)
 301                                continue;
 302
 303                        set_island_marks(obj, root_marks);
 304                }
 305
 306                free_tree_buffer(tree);
 307
 308                display_progress(progress_state, i+1);
 309        }
 310
 311        stop_progress(&progress_state);
 312        free(todo);
 313}
 314
 315static regex_t *island_regexes;
 316static unsigned int island_regexes_alloc, island_regexes_nr;
 317static const char *core_island_name;
 318
 319static int island_config_callback(const char *k, const char *v, void *cb)
 320{
 321        if (!strcmp(k, "pack.island")) {
 322                struct strbuf re = STRBUF_INIT;
 323
 324                if (!v)
 325                        return config_error_nonbool(k);
 326
 327                ALLOC_GROW(island_regexes, island_regexes_nr + 1, island_regexes_alloc);
 328
 329                if (*v != '^')
 330                        strbuf_addch(&re, '^');
 331                strbuf_addstr(&re, v);
 332
 333                if (regcomp(&island_regexes[island_regexes_nr], re.buf, REG_EXTENDED))
 334                        die(_("failed to load island regex for '%s': %s"), k, re.buf);
 335
 336                strbuf_release(&re);
 337                island_regexes_nr++;
 338                return 0;
 339        }
 340
 341        if (!strcmp(k, "pack.islandcore"))
 342                return git_config_string(&core_island_name, k, v);
 343
 344        return 0;
 345}
 346
 347static void add_ref_to_island(const char *island_name, const struct object_id *oid)
 348{
 349        uint64_t sha_core;
 350        struct remote_island *rl = NULL;
 351
 352        int hash_ret;
 353        khiter_t pos = kh_put_str(remote_islands, island_name, &hash_ret);
 354
 355        if (hash_ret) {
 356                kh_key(remote_islands, pos) = xstrdup(island_name);
 357                kh_value(remote_islands, pos) = xcalloc(1, sizeof(struct remote_island));
 358        }
 359
 360        rl = kh_value(remote_islands, pos);
 361        oid_array_append(&rl->oids, oid);
 362
 363        memcpy(&sha_core, oid->hash, sizeof(uint64_t));
 364        rl->hash += sha_core;
 365}
 366
 367static int find_island_for_ref(const char *refname, const struct object_id *oid,
 368                               int flags, void *data)
 369{
 370        /*
 371         * We should advertise 'ARRAY_SIZE(matches) - 2' as the max,
 372         * so we can diagnose below a config with more capture groups
 373         * than we support.
 374         */
 375        regmatch_t matches[16];
 376        int i, m;
 377        struct strbuf island_name = STRBUF_INIT;
 378
 379        /* walk backwards to get last-one-wins ordering */
 380        for (i = island_regexes_nr - 1; i >= 0; i--) {
 381                if (!regexec(&island_regexes[i], refname,
 382                             ARRAY_SIZE(matches), matches, 0))
 383                        break;
 384        }
 385
 386        if (i < 0)
 387                return 0;
 388
 389        if (matches[ARRAY_SIZE(matches) - 1].rm_so != -1)
 390                warning(_("island regex from config has "
 391                          "too many capture groups (max=%d)"),
 392                        (int)ARRAY_SIZE(matches) - 2);
 393
 394        for (m = 1; m < ARRAY_SIZE(matches); m++) {
 395                regmatch_t *match = &matches[m];
 396
 397                if (match->rm_so == -1)
 398                        continue;
 399
 400                if (island_name.len)
 401                        strbuf_addch(&island_name, '-');
 402
 403                strbuf_add(&island_name, refname + match->rm_so, match->rm_eo - match->rm_so);
 404        }
 405
 406        add_ref_to_island(island_name.buf, oid);
 407        strbuf_release(&island_name);
 408        return 0;
 409}
 410
 411static struct remote_island *get_core_island(void)
 412{
 413        if (core_island_name) {
 414                khiter_t pos = kh_get_str(remote_islands, core_island_name);
 415                if (pos < kh_end(remote_islands))
 416                        return kh_value(remote_islands, pos);
 417        }
 418
 419        return NULL;
 420}
 421
 422static void deduplicate_islands(struct repository *r)
 423{
 424        struct remote_island *island, *core = NULL, **list;
 425        unsigned int island_count, dst, src, ref, i = 0;
 426
 427        island_count = kh_size(remote_islands);
 428        ALLOC_ARRAY(list, island_count);
 429
 430        kh_foreach_value(remote_islands, island, {
 431                list[i++] = island;
 432        });
 433
 434        for (ref = 0; ref + 1 < island_count; ref++) {
 435                for (src = ref + 1, dst = src; src < island_count; src++) {
 436                        if (list[ref]->hash == list[src]->hash)
 437                                continue;
 438
 439                        if (src != dst)
 440                                list[dst] = list[src];
 441
 442                        dst++;
 443                }
 444                island_count = dst;
 445        }
 446
 447        island_bitmap_size = (island_count / 32) + 1;
 448        core = get_core_island();
 449
 450        for (i = 0; i < island_count; ++i) {
 451                mark_remote_island_1(r, list[i], core && list[i]->hash == core->hash);
 452        }
 453
 454        free(list);
 455}
 456
 457void load_delta_islands(struct repository *r, int progress)
 458{
 459        island_marks = kh_init_oid_map();
 460        remote_islands = kh_init_str();
 461
 462        git_config(island_config_callback, NULL);
 463        for_each_ref(find_island_for_ref, NULL);
 464        deduplicate_islands(r);
 465
 466        if (progress)
 467                fprintf(stderr, _("Marked %d islands, done.\n"), island_counter);
 468}
 469
 470void propagate_island_marks(struct commit *commit)
 471{
 472        khiter_t pos = kh_get_oid_map(island_marks, commit->object.oid);
 473
 474        if (pos < kh_end(island_marks)) {
 475                struct commit_list *p;
 476                struct island_bitmap *root_marks = kh_value(island_marks, pos);
 477
 478                parse_commit(commit);
 479                set_island_marks(&get_commit_tree(commit)->object, root_marks);
 480                for (p = commit->parents; p; p = p->next)
 481                        set_island_marks(&p->item->object, root_marks);
 482        }
 483}
 484
 485int compute_pack_layers(struct packing_data *to_pack)
 486{
 487        uint32_t i;
 488
 489        if (!core_island_name || !island_marks)
 490                return 1;
 491
 492        for (i = 0; i < to_pack->nr_objects; ++i) {
 493                struct object_entry *entry = &to_pack->objects[i];
 494                khiter_t pos = kh_get_oid_map(island_marks, entry->idx.oid);
 495
 496                oe_set_layer(to_pack, entry, 1);
 497
 498                if (pos < kh_end(island_marks)) {
 499                        struct island_bitmap *bitmap = kh_value(island_marks, pos);
 500
 501                        if (island_bitmap_get(bitmap, island_counter_core))
 502                                oe_set_layer(to_pack, entry, 0);
 503                }
 504        }
 505
 506        return 2;
 507}