f8201d888b9a26299b3c7b0eb7cc48ad546dd7f2
   1#include "cache.h"
   2#include "config.h"
   3#include "dir.h"
   4#include "git-compat-util.h"
   5#include "lockfile.h"
   6#include "pack.h"
   7#include "packfile.h"
   8#include "commit.h"
   9#include "object.h"
  10#include "refs.h"
  11#include "revision.h"
  12#include "sha1-lookup.h"
  13#include "commit-graph.h"
  14#include "object-store.h"
  15#include "alloc.h"
  16#include "hashmap.h"
  17#include "replace-object.h"
  18#include "progress.h"
  19
  20#define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
  21#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
  22#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
  23#define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
  24#define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
  25
  26#define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
  27
  28#define GRAPH_VERSION_1 0x1
  29#define GRAPH_VERSION GRAPH_VERSION_1
  30
  31#define GRAPH_EXTRA_EDGES_NEEDED 0x80000000
  32#define GRAPH_EDGE_LAST_MASK 0x7fffffff
  33#define GRAPH_PARENT_NONE 0x70000000
  34
  35#define GRAPH_LAST_EDGE 0x80000000
  36
  37#define GRAPH_HEADER_SIZE 8
  38#define GRAPH_FANOUT_SIZE (4 * 256)
  39#define GRAPH_CHUNKLOOKUP_WIDTH 12
  40#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
  41                        + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
  42
  43char *get_commit_graph_filename(const char *obj_dir)
  44{
  45        return xstrfmt("%s/info/commit-graph", obj_dir);
  46}
  47
  48static uint8_t oid_version(void)
  49{
  50        return 1;
  51}
  52
  53static struct commit_graph *alloc_commit_graph(void)
  54{
  55        struct commit_graph *g = xcalloc(1, sizeof(*g));
  56        g->graph_fd = -1;
  57
  58        return g;
  59}
  60
  61extern int read_replace_refs;
  62
  63static int commit_graph_compatible(struct repository *r)
  64{
  65        if (!r->gitdir)
  66                return 0;
  67
  68        if (read_replace_refs) {
  69                prepare_replace_object(r);
  70                if (hashmap_get_size(&r->objects->replace_map->map))
  71                        return 0;
  72        }
  73
  74        prepare_commit_graft(r);
  75        if (r->parsed_objects && r->parsed_objects->grafts_nr)
  76                return 0;
  77        if (is_repository_shallow(r))
  78                return 0;
  79
  80        return 1;
  81}
  82
  83struct commit_graph *load_commit_graph_one(const char *graph_file)
  84{
  85        void *graph_map;
  86        size_t graph_size;
  87        struct stat st;
  88        struct commit_graph *ret;
  89        int fd = git_open(graph_file);
  90
  91        if (fd < 0)
  92                return NULL;
  93        if (fstat(fd, &st)) {
  94                close(fd);
  95                return NULL;
  96        }
  97        graph_size = xsize_t(st.st_size);
  98
  99        if (graph_size < GRAPH_MIN_SIZE) {
 100                close(fd);
 101                die(_("graph file %s is too small"), graph_file);
 102        }
 103        graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
 104        ret = parse_commit_graph(graph_map, fd, graph_size);
 105
 106        if (!ret) {
 107                munmap(graph_map, graph_size);
 108                close(fd);
 109                exit(1);
 110        }
 111
 112        return ret;
 113}
 114
 115static int verify_commit_graph_lite(struct commit_graph *g)
 116{
 117        /*
 118         * Basic validation shared between parse_commit_graph()
 119         * which'll be called every time the graph is used, and the
 120         * much more expensive verify_commit_graph() used by
 121         * "commit-graph verify".
 122         *
 123         * There should only be very basic checks here to ensure that
 124         * we don't e.g. segfault in fill_commit_in_graph(), but
 125         * because this is a very hot codepath nothing that e.g. loops
 126         * over g->num_commits, or runs a checksum on the commit-graph
 127         * itself.
 128         */
 129        if (!g->chunk_oid_fanout) {
 130                error("commit-graph is missing the OID Fanout chunk");
 131                return 1;
 132        }
 133        if (!g->chunk_oid_lookup) {
 134                error("commit-graph is missing the OID Lookup chunk");
 135                return 1;
 136        }
 137        if (!g->chunk_commit_data) {
 138                error("commit-graph is missing the Commit Data chunk");
 139                return 1;
 140        }
 141
 142        return 0;
 143}
 144
 145struct commit_graph *parse_commit_graph(void *graph_map, int fd,
 146                                        size_t graph_size)
 147{
 148        const unsigned char *data, *chunk_lookup;
 149        uint32_t i;
 150        struct commit_graph *graph;
 151        uint64_t last_chunk_offset;
 152        uint32_t last_chunk_id;
 153        uint32_t graph_signature;
 154        unsigned char graph_version, hash_version;
 155
 156        if (!graph_map)
 157                return NULL;
 158
 159        if (graph_size < GRAPH_MIN_SIZE)
 160                return NULL;
 161
 162        data = (const unsigned char *)graph_map;
 163
 164        graph_signature = get_be32(data);
 165        if (graph_signature != GRAPH_SIGNATURE) {
 166                error(_("graph signature %X does not match signature %X"),
 167                      graph_signature, GRAPH_SIGNATURE);
 168                return NULL;
 169        }
 170
 171        graph_version = *(unsigned char*)(data + 4);
 172        if (graph_version != GRAPH_VERSION) {
 173                error(_("graph version %X does not match version %X"),
 174                      graph_version, GRAPH_VERSION);
 175                return NULL;
 176        }
 177
 178        hash_version = *(unsigned char*)(data + 5);
 179        if (hash_version != oid_version()) {
 180                error(_("hash version %X does not match version %X"),
 181                      hash_version, oid_version());
 182                return NULL;
 183        }
 184
 185        graph = alloc_commit_graph();
 186
 187        graph->hash_len = the_hash_algo->rawsz;
 188        graph->num_chunks = *(unsigned char*)(data + 6);
 189        graph->graph_fd = fd;
 190        graph->data = graph_map;
 191        graph->data_len = graph_size;
 192
 193        last_chunk_id = 0;
 194        last_chunk_offset = 8;
 195        chunk_lookup = data + 8;
 196        for (i = 0; i < graph->num_chunks; i++) {
 197                uint32_t chunk_id;
 198                uint64_t chunk_offset;
 199                int chunk_repeated = 0;
 200
 201                if (data + graph_size - chunk_lookup <
 202                    GRAPH_CHUNKLOOKUP_WIDTH) {
 203                        error(_("chunk lookup table entry missing; graph file may be incomplete"));
 204                        free(graph);
 205                        return NULL;
 206                }
 207
 208                chunk_id = get_be32(chunk_lookup + 0);
 209                chunk_offset = get_be64(chunk_lookup + 4);
 210
 211                chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
 212
 213                if (chunk_offset > graph_size - the_hash_algo->rawsz) {
 214                        error(_("improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
 215                              (uint32_t)chunk_offset);
 216                        free(graph);
 217                        return NULL;
 218                }
 219
 220                switch (chunk_id) {
 221                case GRAPH_CHUNKID_OIDFANOUT:
 222                        if (graph->chunk_oid_fanout)
 223                                chunk_repeated = 1;
 224                        else
 225                                graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset);
 226                        break;
 227
 228                case GRAPH_CHUNKID_OIDLOOKUP:
 229                        if (graph->chunk_oid_lookup)
 230                                chunk_repeated = 1;
 231                        else
 232                                graph->chunk_oid_lookup = data + chunk_offset;
 233                        break;
 234
 235                case GRAPH_CHUNKID_DATA:
 236                        if (graph->chunk_commit_data)
 237                                chunk_repeated = 1;
 238                        else
 239                                graph->chunk_commit_data = data + chunk_offset;
 240                        break;
 241
 242                case GRAPH_CHUNKID_EXTRAEDGES:
 243                        if (graph->chunk_extra_edges)
 244                                chunk_repeated = 1;
 245                        else
 246                                graph->chunk_extra_edges = data + chunk_offset;
 247                        break;
 248                }
 249
 250                if (chunk_repeated) {
 251                        error(_("chunk id %08x appears multiple times"), chunk_id);
 252                        free(graph);
 253                        return NULL;
 254                }
 255
 256                if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
 257                {
 258                        graph->num_commits = (chunk_offset - last_chunk_offset)
 259                                             / graph->hash_len;
 260                }
 261
 262                last_chunk_id = chunk_id;
 263                last_chunk_offset = chunk_offset;
 264        }
 265
 266        if (verify_commit_graph_lite(graph))
 267                return NULL;
 268
 269        return graph;
 270}
 271
 272static void prepare_commit_graph_one(struct repository *r, const char *obj_dir)
 273{
 274        char *graph_name;
 275
 276        if (r->objects->commit_graph)
 277                return;
 278
 279        graph_name = get_commit_graph_filename(obj_dir);
 280        r->objects->commit_graph =
 281                load_commit_graph_one(graph_name);
 282
 283        FREE_AND_NULL(graph_name);
 284}
 285
 286/*
 287 * Return 1 if commit_graph is non-NULL, and 0 otherwise.
 288 *
 289 * On the first invocation, this function attemps to load the commit
 290 * graph if the_repository is configured to have one.
 291 */
 292static int prepare_commit_graph(struct repository *r)
 293{
 294        struct object_directory *odb;
 295        int config_value;
 296
 297        if (r->objects->commit_graph_attempted)
 298                return !!r->objects->commit_graph;
 299        r->objects->commit_graph_attempted = 1;
 300
 301        if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
 302            (repo_config_get_bool(r, "core.commitgraph", &config_value) ||
 303            !config_value))
 304                /*
 305                 * This repository is not configured to use commit graphs, so
 306                 * do not load one. (But report commit_graph_attempted anyway
 307                 * so that commit graph loading is not attempted again for this
 308                 * repository.)
 309                 */
 310                return 0;
 311
 312        if (!commit_graph_compatible(r))
 313                return 0;
 314
 315        prepare_alt_odb(r);
 316        for (odb = r->objects->odb;
 317             !r->objects->commit_graph && odb;
 318             odb = odb->next)
 319                prepare_commit_graph_one(r, odb->path);
 320        return !!r->objects->commit_graph;
 321}
 322
 323int generation_numbers_enabled(struct repository *r)
 324{
 325        uint32_t first_generation;
 326        struct commit_graph *g;
 327        if (!prepare_commit_graph(r))
 328               return 0;
 329
 330        g = r->objects->commit_graph;
 331
 332        if (!g->num_commits)
 333                return 0;
 334
 335        first_generation = get_be32(g->chunk_commit_data +
 336                                    g->hash_len + 8) >> 2;
 337
 338        return !!first_generation;
 339}
 340
 341void close_commit_graph(struct repository *r)
 342{
 343        free_commit_graph(r->objects->commit_graph);
 344        r->objects->commit_graph = NULL;
 345}
 346
 347static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos)
 348{
 349        return bsearch_hash(oid->hash, g->chunk_oid_fanout,
 350                            g->chunk_oid_lookup, g->hash_len, pos);
 351}
 352
 353static struct commit_list **insert_parent_or_die(struct repository *r,
 354                                                 struct commit_graph *g,
 355                                                 uint64_t pos,
 356                                                 struct commit_list **pptr)
 357{
 358        struct commit *c;
 359        struct object_id oid;
 360
 361        if (pos >= g->num_commits)
 362                die("invalid parent position %"PRIu64, pos);
 363
 364        hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos);
 365        c = lookup_commit(r, &oid);
 366        if (!c)
 367                die(_("could not find commit %s"), oid_to_hex(&oid));
 368        c->graph_pos = pos;
 369        return &commit_list_insert(c, pptr)->next;
 370}
 371
 372static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)
 373{
 374        const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos;
 375        item->graph_pos = pos;
 376        item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
 377}
 378
 379static int fill_commit_in_graph(struct repository *r,
 380                                struct commit *item,
 381                                struct commit_graph *g, uint32_t pos)
 382{
 383        uint32_t edge_value;
 384        uint32_t *parent_data_ptr;
 385        uint64_t date_low, date_high;
 386        struct commit_list **pptr;
 387        const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len + 16) * pos;
 388
 389        item->object.parsed = 1;
 390        item->graph_pos = pos;
 391
 392        item->maybe_tree = NULL;
 393
 394        date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
 395        date_low = get_be32(commit_data + g->hash_len + 12);
 396        item->date = (timestamp_t)((date_high << 32) | date_low);
 397
 398        item->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
 399
 400        pptr = &item->parents;
 401
 402        edge_value = get_be32(commit_data + g->hash_len);
 403        if (edge_value == GRAPH_PARENT_NONE)
 404                return 1;
 405        pptr = insert_parent_or_die(r, g, edge_value, pptr);
 406
 407        edge_value = get_be32(commit_data + g->hash_len + 4);
 408        if (edge_value == GRAPH_PARENT_NONE)
 409                return 1;
 410        if (!(edge_value & GRAPH_EXTRA_EDGES_NEEDED)) {
 411                pptr = insert_parent_or_die(r, g, edge_value, pptr);
 412                return 1;
 413        }
 414
 415        parent_data_ptr = (uint32_t*)(g->chunk_extra_edges +
 416                          4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK));
 417        do {
 418                edge_value = get_be32(parent_data_ptr);
 419                pptr = insert_parent_or_die(r, g,
 420                                            edge_value & GRAPH_EDGE_LAST_MASK,
 421                                            pptr);
 422                parent_data_ptr++;
 423        } while (!(edge_value & GRAPH_LAST_EDGE));
 424
 425        return 1;
 426}
 427
 428static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
 429{
 430        if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) {
 431                *pos = item->graph_pos;
 432                return 1;
 433        } else {
 434                return bsearch_graph(g, &(item->object.oid), pos);
 435        }
 436}
 437
 438static int parse_commit_in_graph_one(struct repository *r,
 439                                     struct commit_graph *g,
 440                                     struct commit *item)
 441{
 442        uint32_t pos;
 443
 444        if (item->object.parsed)
 445                return 1;
 446
 447        if (find_commit_in_graph(item, g, &pos))
 448                return fill_commit_in_graph(r, item, g, pos);
 449
 450        return 0;
 451}
 452
 453int parse_commit_in_graph(struct repository *r, struct commit *item)
 454{
 455        if (!prepare_commit_graph(r))
 456                return 0;
 457        return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
 458}
 459
 460void load_commit_graph_info(struct repository *r, struct commit *item)
 461{
 462        uint32_t pos;
 463        if (!prepare_commit_graph(r))
 464                return;
 465        if (find_commit_in_graph(item, r->objects->commit_graph, &pos))
 466                fill_commit_graph_info(item, r->objects->commit_graph, pos);
 467}
 468
 469static struct tree *load_tree_for_commit(struct repository *r,
 470                                         struct commit_graph *g,
 471                                         struct commit *c)
 472{
 473        struct object_id oid;
 474        const unsigned char *commit_data = g->chunk_commit_data +
 475                                           GRAPH_DATA_WIDTH * (c->graph_pos);
 476
 477        hashcpy(oid.hash, commit_data);
 478        c->maybe_tree = lookup_tree(r, &oid);
 479
 480        return c->maybe_tree;
 481}
 482
 483static struct tree *get_commit_tree_in_graph_one(struct repository *r,
 484                                                 struct commit_graph *g,
 485                                                 const struct commit *c)
 486{
 487        if (c->maybe_tree)
 488                return c->maybe_tree;
 489        if (c->graph_pos == COMMIT_NOT_FROM_GRAPH)
 490                BUG("get_commit_tree_in_graph_one called from non-commit-graph commit");
 491
 492        return load_tree_for_commit(r, g, (struct commit *)c);
 493}
 494
 495struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit *c)
 496{
 497        return get_commit_tree_in_graph_one(r, r->objects->commit_graph, c);
 498}
 499
 500static void write_graph_chunk_fanout(struct hashfile *f,
 501                                     struct commit **commits,
 502                                     int nr_commits,
 503                                     struct progress *progress,
 504                                     uint64_t *progress_cnt)
 505{
 506        int i, count = 0;
 507        struct commit **list = commits;
 508
 509        /*
 510         * Write the first-level table (the list is sorted,
 511         * but we use a 256-entry lookup to be able to avoid
 512         * having to do eight extra binary search iterations).
 513         */
 514        for (i = 0; i < 256; i++) {
 515                while (count < nr_commits) {
 516                        if ((*list)->object.oid.hash[0] != i)
 517                                break;
 518                        display_progress(progress, ++*progress_cnt);
 519                        count++;
 520                        list++;
 521                }
 522
 523                hashwrite_be32(f, count);
 524        }
 525}
 526
 527static void write_graph_chunk_oids(struct hashfile *f, int hash_len,
 528                                   struct commit **commits, int nr_commits,
 529                                   struct progress *progress,
 530                                   uint64_t *progress_cnt)
 531{
 532        struct commit **list = commits;
 533        int count;
 534        for (count = 0; count < nr_commits; count++, list++) {
 535                display_progress(progress, ++*progress_cnt);
 536                hashwrite(f, (*list)->object.oid.hash, (int)hash_len);
 537        }
 538}
 539
 540static const unsigned char *commit_to_sha1(size_t index, void *table)
 541{
 542        struct commit **commits = table;
 543        return commits[index]->object.oid.hash;
 544}
 545
 546static void write_graph_chunk_data(struct hashfile *f, int hash_len,
 547                                   struct commit **commits, int nr_commits,
 548                                   struct progress *progress,
 549                                   uint64_t *progress_cnt)
 550{
 551        struct commit **list = commits;
 552        struct commit **last = commits + nr_commits;
 553        uint32_t num_extra_edges = 0;
 554
 555        while (list < last) {
 556                struct commit_list *parent;
 557                int edge_value;
 558                uint32_t packedDate[2];
 559                display_progress(progress, ++*progress_cnt);
 560
 561                parse_commit(*list);
 562                hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len);
 563
 564                parent = (*list)->parents;
 565
 566                if (!parent)
 567                        edge_value = GRAPH_PARENT_NONE;
 568                else {
 569                        edge_value = sha1_pos(parent->item->object.oid.hash,
 570                                              commits,
 571                                              nr_commits,
 572                                              commit_to_sha1);
 573
 574                        if (edge_value < 0)
 575                                BUG("missing parent %s for commit %s",
 576                                    oid_to_hex(&parent->item->object.oid),
 577                                    oid_to_hex(&(*list)->object.oid));
 578                }
 579
 580                hashwrite_be32(f, edge_value);
 581
 582                if (parent)
 583                        parent = parent->next;
 584
 585                if (!parent)
 586                        edge_value = GRAPH_PARENT_NONE;
 587                else if (parent->next)
 588                        edge_value = GRAPH_EXTRA_EDGES_NEEDED | num_extra_edges;
 589                else {
 590                        edge_value = sha1_pos(parent->item->object.oid.hash,
 591                                              commits,
 592                                              nr_commits,
 593                                              commit_to_sha1);
 594                        if (edge_value < 0)
 595                                BUG("missing parent %s for commit %s",
 596                                    oid_to_hex(&parent->item->object.oid),
 597                                    oid_to_hex(&(*list)->object.oid));
 598                }
 599
 600                hashwrite_be32(f, edge_value);
 601
 602                if (edge_value & GRAPH_EXTRA_EDGES_NEEDED) {
 603                        do {
 604                                num_extra_edges++;
 605                                parent = parent->next;
 606                        } while (parent);
 607                }
 608
 609                if (sizeof((*list)->date) > 4)
 610                        packedDate[0] = htonl(((*list)->date >> 32) & 0x3);
 611                else
 612                        packedDate[0] = 0;
 613
 614                packedDate[0] |= htonl((*list)->generation << 2);
 615
 616                packedDate[1] = htonl((*list)->date);
 617                hashwrite(f, packedDate, 8);
 618
 619                list++;
 620        }
 621}
 622
 623static void write_graph_chunk_extra_edges(struct hashfile *f,
 624                                          struct commit **commits,
 625                                          int nr_commits,
 626                                          struct progress *progress,
 627                                          uint64_t *progress_cnt)
 628{
 629        struct commit **list = commits;
 630        struct commit **last = commits + nr_commits;
 631        struct commit_list *parent;
 632
 633        while (list < last) {
 634                int num_parents = 0;
 635
 636                display_progress(progress, ++*progress_cnt);
 637
 638                for (parent = (*list)->parents; num_parents < 3 && parent;
 639                     parent = parent->next)
 640                        num_parents++;
 641
 642                if (num_parents <= 2) {
 643                        list++;
 644                        continue;
 645                }
 646
 647                /* Since num_parents > 2, this initializer is safe. */
 648                for (parent = (*list)->parents->next; parent; parent = parent->next) {
 649                        int edge_value = sha1_pos(parent->item->object.oid.hash,
 650                                                  commits,
 651                                                  nr_commits,
 652                                                  commit_to_sha1);
 653
 654                        if (edge_value < 0)
 655                                BUG("missing parent %s for commit %s",
 656                                    oid_to_hex(&parent->item->object.oid),
 657                                    oid_to_hex(&(*list)->object.oid));
 658                        else if (!parent->next)
 659                                edge_value |= GRAPH_LAST_EDGE;
 660
 661                        hashwrite_be32(f, edge_value);
 662                }
 663
 664                list++;
 665        }
 666}
 667
 668static int commit_compare(const void *_a, const void *_b)
 669{
 670        const struct object_id *a = (const struct object_id *)_a;
 671        const struct object_id *b = (const struct object_id *)_b;
 672        return oidcmp(a, b);
 673}
 674
 675struct packed_commit_list {
 676        struct commit **list;
 677        int nr;
 678        int alloc;
 679};
 680
 681struct packed_oid_list {
 682        struct object_id *list;
 683        int nr;
 684        int alloc;
 685        struct progress *progress;
 686        int progress_done;
 687};
 688
 689static int add_packed_commits(const struct object_id *oid,
 690                              struct packed_git *pack,
 691                              uint32_t pos,
 692                              void *data)
 693{
 694        struct packed_oid_list *list = (struct packed_oid_list*)data;
 695        enum object_type type;
 696        off_t offset = nth_packed_object_offset(pack, pos);
 697        struct object_info oi = OBJECT_INFO_INIT;
 698
 699        if (list->progress)
 700                display_progress(list->progress, ++list->progress_done);
 701
 702        oi.typep = &type;
 703        if (packed_object_info(the_repository, pack, offset, &oi) < 0)
 704                die(_("unable to get type of object %s"), oid_to_hex(oid));
 705
 706        if (type != OBJ_COMMIT)
 707                return 0;
 708
 709        ALLOC_GROW(list->list, list->nr + 1, list->alloc);
 710        oidcpy(&(list->list[list->nr]), oid);
 711        list->nr++;
 712
 713        return 0;
 714}
 715
 716static void add_missing_parents(struct packed_oid_list *oids, struct commit *commit)
 717{
 718        struct commit_list *parent;
 719        for (parent = commit->parents; parent; parent = parent->next) {
 720                if (!(parent->item->object.flags & UNINTERESTING)) {
 721                        ALLOC_GROW(oids->list, oids->nr + 1, oids->alloc);
 722                        oidcpy(&oids->list[oids->nr], &(parent->item->object.oid));
 723                        oids->nr++;
 724                        parent->item->object.flags |= UNINTERESTING;
 725                }
 726        }
 727}
 728
 729static void close_reachable(struct packed_oid_list *oids, int report_progress)
 730{
 731        int i;
 732        struct commit *commit;
 733        struct progress *progress = NULL;
 734
 735        if (report_progress)
 736                progress = start_delayed_progress(
 737                        _("Loading known commits in commit graph"), oids->nr);
 738        for (i = 0; i < oids->nr; i++) {
 739                display_progress(progress, i + 1);
 740                commit = lookup_commit(the_repository, &oids->list[i]);
 741                if (commit)
 742                        commit->object.flags |= UNINTERESTING;
 743        }
 744        stop_progress(&progress);
 745
 746        /*
 747         * As this loop runs, oids->nr may grow, but not more
 748         * than the number of missing commits in the reachable
 749         * closure.
 750         */
 751        if (report_progress)
 752                progress = start_delayed_progress(
 753                        _("Expanding reachable commits in commit graph"), oids->nr);
 754        for (i = 0; i < oids->nr; i++) {
 755                display_progress(progress, i + 1);
 756                commit = lookup_commit(the_repository, &oids->list[i]);
 757
 758                if (commit && !parse_commit(commit))
 759                        add_missing_parents(oids, commit);
 760        }
 761        stop_progress(&progress);
 762
 763        if (report_progress)
 764                progress = start_delayed_progress(
 765                        _("Clearing commit marks in commit graph"), oids->nr);
 766        for (i = 0; i < oids->nr; i++) {
 767                display_progress(progress, i + 1);
 768                commit = lookup_commit(the_repository, &oids->list[i]);
 769
 770                if (commit)
 771                        commit->object.flags &= ~UNINTERESTING;
 772        }
 773        stop_progress(&progress);
 774}
 775
 776static void compute_generation_numbers(struct packed_commit_list* commits,
 777                                       int report_progress)
 778{
 779        int i;
 780        struct commit_list *list = NULL;
 781        struct progress *progress = NULL;
 782
 783        if (report_progress)
 784                progress = start_progress(
 785                        _("Computing commit graph generation numbers"),
 786                        commits->nr);
 787        for (i = 0; i < commits->nr; i++) {
 788                display_progress(progress, i + 1);
 789                if (commits->list[i]->generation != GENERATION_NUMBER_INFINITY &&
 790                    commits->list[i]->generation != GENERATION_NUMBER_ZERO)
 791                        continue;
 792
 793                commit_list_insert(commits->list[i], &list);
 794                while (list) {
 795                        struct commit *current = list->item;
 796                        struct commit_list *parent;
 797                        int all_parents_computed = 1;
 798                        uint32_t max_generation = 0;
 799
 800                        for (parent = current->parents; parent; parent = parent->next) {
 801                                if (parent->item->generation == GENERATION_NUMBER_INFINITY ||
 802                                    parent->item->generation == GENERATION_NUMBER_ZERO) {
 803                                        all_parents_computed = 0;
 804                                        commit_list_insert(parent->item, &list);
 805                                        break;
 806                                } else if (parent->item->generation > max_generation) {
 807                                        max_generation = parent->item->generation;
 808                                }
 809                        }
 810
 811                        if (all_parents_computed) {
 812                                current->generation = max_generation + 1;
 813                                pop_commit(&list);
 814
 815                                if (current->generation > GENERATION_NUMBER_MAX)
 816                                        current->generation = GENERATION_NUMBER_MAX;
 817                        }
 818                }
 819        }
 820        stop_progress(&progress);
 821}
 822
 823static int add_ref_to_list(const char *refname,
 824                           const struct object_id *oid,
 825                           int flags, void *cb_data)
 826{
 827        struct string_list *list = (struct string_list *)cb_data;
 828
 829        string_list_append(list, oid_to_hex(oid));
 830        return 0;
 831}
 832
 833void write_commit_graph_reachable(const char *obj_dir, int append,
 834                                  int report_progress)
 835{
 836        struct string_list list = STRING_LIST_INIT_DUP;
 837
 838        for_each_ref(add_ref_to_list, &list);
 839        write_commit_graph(obj_dir, NULL, &list, append, report_progress);
 840
 841        string_list_clear(&list, 0);
 842}
 843
 844void write_commit_graph(const char *obj_dir,
 845                        struct string_list *pack_indexes,
 846                        struct string_list *commit_hex,
 847                        int append, int report_progress)
 848{
 849        struct packed_oid_list oids;
 850        struct packed_commit_list commits;
 851        struct hashfile *f;
 852        uint32_t i, count_distinct = 0;
 853        char *graph_name;
 854        struct lock_file lk = LOCK_INIT;
 855        uint32_t chunk_ids[5];
 856        uint64_t chunk_offsets[5];
 857        int num_chunks;
 858        int num_extra_edges;
 859        struct commit_list *parent;
 860        struct progress *progress = NULL;
 861        const unsigned hashsz = the_hash_algo->rawsz;
 862        uint64_t progress_cnt = 0;
 863        struct strbuf progress_title = STRBUF_INIT;
 864        unsigned long approx_nr_objects;
 865
 866        if (!commit_graph_compatible(the_repository))
 867                return;
 868
 869        oids.nr = 0;
 870        approx_nr_objects = approximate_object_count();
 871        oids.alloc = approx_nr_objects / 32;
 872        oids.progress = NULL;
 873        oids.progress_done = 0;
 874
 875        if (append) {
 876                prepare_commit_graph_one(the_repository, obj_dir);
 877                if (the_repository->objects->commit_graph)
 878                        oids.alloc += the_repository->objects->commit_graph->num_commits;
 879        }
 880
 881        if (oids.alloc < 1024)
 882                oids.alloc = 1024;
 883        ALLOC_ARRAY(oids.list, oids.alloc);
 884
 885        if (append && the_repository->objects->commit_graph) {
 886                struct commit_graph *commit_graph =
 887                        the_repository->objects->commit_graph;
 888                for (i = 0; i < commit_graph->num_commits; i++) {
 889                        const unsigned char *hash = commit_graph->chunk_oid_lookup +
 890                                commit_graph->hash_len * i;
 891                        hashcpy(oids.list[oids.nr++].hash, hash);
 892                }
 893        }
 894
 895        if (pack_indexes) {
 896                struct strbuf packname = STRBUF_INIT;
 897                int dirlen;
 898                strbuf_addf(&packname, "%s/pack/", obj_dir);
 899                dirlen = packname.len;
 900                if (report_progress) {
 901                        strbuf_addf(&progress_title,
 902                                    Q_("Finding commits for commit graph in %d pack",
 903                                       "Finding commits for commit graph in %d packs",
 904                                       pack_indexes->nr),
 905                                    pack_indexes->nr);
 906                        oids.progress = start_delayed_progress(progress_title.buf, 0);
 907                        oids.progress_done = 0;
 908                }
 909                for (i = 0; i < pack_indexes->nr; i++) {
 910                        struct packed_git *p;
 911                        strbuf_setlen(&packname, dirlen);
 912                        strbuf_addstr(&packname, pack_indexes->items[i].string);
 913                        p = add_packed_git(packname.buf, packname.len, 1);
 914                        if (!p)
 915                                die(_("error adding pack %s"), packname.buf);
 916                        if (open_pack_index(p))
 917                                die(_("error opening index for %s"), packname.buf);
 918                        for_each_object_in_pack(p, add_packed_commits, &oids,
 919                                                FOR_EACH_OBJECT_PACK_ORDER);
 920                        close_pack(p);
 921                        free(p);
 922                }
 923                stop_progress(&oids.progress);
 924                strbuf_reset(&progress_title);
 925                strbuf_release(&packname);
 926        }
 927
 928        if (commit_hex) {
 929                if (report_progress) {
 930                        strbuf_addf(&progress_title,
 931                                    Q_("Finding commits for commit graph from %d ref",
 932                                       "Finding commits for commit graph from %d refs",
 933                                       commit_hex->nr),
 934                                    commit_hex->nr);
 935                        progress = start_delayed_progress(progress_title.buf,
 936                                                          commit_hex->nr);
 937                }
 938                for (i = 0; i < commit_hex->nr; i++) {
 939                        const char *end;
 940                        struct object_id oid;
 941                        struct commit *result;
 942
 943                        display_progress(progress, i + 1);
 944                        if (commit_hex->items[i].string &&
 945                            parse_oid_hex(commit_hex->items[i].string, &oid, &end))
 946                                continue;
 947
 948                        result = lookup_commit_reference_gently(the_repository, &oid, 1);
 949
 950                        if (result) {
 951                                ALLOC_GROW(oids.list, oids.nr + 1, oids.alloc);
 952                                oidcpy(&oids.list[oids.nr], &(result->object.oid));
 953                                oids.nr++;
 954                        }
 955                }
 956                stop_progress(&progress);
 957                strbuf_reset(&progress_title);
 958        }
 959
 960        if (!pack_indexes && !commit_hex) {
 961                if (report_progress)
 962                        oids.progress = start_delayed_progress(
 963                                _("Finding commits for commit graph among packed objects"),
 964                                approx_nr_objects);
 965                for_each_packed_object(add_packed_commits, &oids,
 966                                       FOR_EACH_OBJECT_PACK_ORDER);
 967                if (oids.progress_done < approx_nr_objects)
 968                        display_progress(oids.progress, approx_nr_objects);
 969                stop_progress(&oids.progress);
 970        }
 971
 972        close_reachable(&oids, report_progress);
 973
 974        if (report_progress)
 975                progress = start_delayed_progress(
 976                        _("Counting distinct commits in commit graph"),
 977                        oids.nr);
 978        display_progress(progress, 0); /* TODO: Measure QSORT() progress */
 979        QSORT(oids.list, oids.nr, commit_compare);
 980        count_distinct = 1;
 981        for (i = 1; i < oids.nr; i++) {
 982                display_progress(progress, i + 1);
 983                if (!oideq(&oids.list[i - 1], &oids.list[i]))
 984                        count_distinct++;
 985        }
 986        stop_progress(&progress);
 987
 988        if (count_distinct >= GRAPH_EDGE_LAST_MASK)
 989                die(_("the commit graph format cannot write %d commits"), count_distinct);
 990
 991        commits.nr = 0;
 992        commits.alloc = count_distinct;
 993        ALLOC_ARRAY(commits.list, commits.alloc);
 994
 995        num_extra_edges = 0;
 996        if (report_progress)
 997                progress = start_delayed_progress(
 998                        _("Finding extra edges in commit graph"),
 999                        oids.nr);
1000        for (i = 0; i < oids.nr; i++) {
1001                int num_parents = 0;
1002                display_progress(progress, i + 1);
1003                if (i > 0 && oideq(&oids.list[i - 1], &oids.list[i]))
1004                        continue;
1005
1006                commits.list[commits.nr] = lookup_commit(the_repository, &oids.list[i]);
1007                parse_commit(commits.list[commits.nr]);
1008
1009                for (parent = commits.list[commits.nr]->parents;
1010                     parent; parent = parent->next)
1011                        num_parents++;
1012
1013                if (num_parents > 2)
1014                        num_extra_edges += num_parents - 1;
1015
1016                commits.nr++;
1017        }
1018        num_chunks = num_extra_edges ? 4 : 3;
1019        stop_progress(&progress);
1020
1021        if (commits.nr >= GRAPH_EDGE_LAST_MASK)
1022                die(_("too many commits to write graph"));
1023
1024        compute_generation_numbers(&commits, report_progress);
1025
1026        graph_name = get_commit_graph_filename(obj_dir);
1027        if (safe_create_leading_directories(graph_name)) {
1028                UNLEAK(graph_name);
1029                die_errno(_("unable to create leading directories of %s"),
1030                          graph_name);
1031        }
1032
1033        hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR);
1034        f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
1035
1036        hashwrite_be32(f, GRAPH_SIGNATURE);
1037
1038        hashwrite_u8(f, GRAPH_VERSION);
1039        hashwrite_u8(f, oid_version());
1040        hashwrite_u8(f, num_chunks);
1041        hashwrite_u8(f, 0); /* unused padding byte */
1042
1043        chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;
1044        chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP;
1045        chunk_ids[2] = GRAPH_CHUNKID_DATA;
1046        if (num_extra_edges)
1047                chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES;
1048        else
1049                chunk_ids[3] = 0;
1050        chunk_ids[4] = 0;
1051
1052        chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
1053        chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE;
1054        chunk_offsets[2] = chunk_offsets[1] + hashsz * commits.nr;
1055        chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * commits.nr;
1056        chunk_offsets[4] = chunk_offsets[3] + 4 * num_extra_edges;
1057
1058        for (i = 0; i <= num_chunks; i++) {
1059                uint32_t chunk_write[3];
1060
1061                chunk_write[0] = htonl(chunk_ids[i]);
1062                chunk_write[1] = htonl(chunk_offsets[i] >> 32);
1063                chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff);
1064                hashwrite(f, chunk_write, 12);
1065        }
1066
1067        if (report_progress) {
1068                strbuf_addf(&progress_title,
1069                            Q_("Writing out commit graph in %d pass",
1070                               "Writing out commit graph in %d passes",
1071                               num_chunks),
1072                            num_chunks);
1073                progress = start_delayed_progress(
1074                        progress_title.buf,
1075                        num_chunks * commits.nr);
1076        }
1077        write_graph_chunk_fanout(f, commits.list, commits.nr, progress, &progress_cnt);
1078        write_graph_chunk_oids(f, hashsz, commits.list, commits.nr, progress, &progress_cnt);
1079        write_graph_chunk_data(f, hashsz, commits.list, commits.nr, progress, &progress_cnt);
1080        if (num_extra_edges)
1081                write_graph_chunk_extra_edges(f, commits.list, commits.nr, progress, &progress_cnt);
1082        stop_progress(&progress);
1083        strbuf_release(&progress_title);
1084
1085        close_commit_graph(the_repository);
1086        finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
1087        commit_lock_file(&lk);
1088
1089        free(graph_name);
1090        free(commits.list);
1091        free(oids.list);
1092}
1093
1094#define VERIFY_COMMIT_GRAPH_ERROR_HASH 2
1095static int verify_commit_graph_error;
1096
1097static void graph_report(const char *fmt, ...)
1098{
1099        va_list ap;
1100
1101        verify_commit_graph_error = 1;
1102        va_start(ap, fmt);
1103        vfprintf(stderr, fmt, ap);
1104        fprintf(stderr, "\n");
1105        va_end(ap);
1106}
1107
1108#define GENERATION_ZERO_EXISTS 1
1109#define GENERATION_NUMBER_EXISTS 2
1110
1111int verify_commit_graph(struct repository *r, struct commit_graph *g)
1112{
1113        uint32_t i, cur_fanout_pos = 0;
1114        struct object_id prev_oid, cur_oid, checksum;
1115        int generation_zero = 0;
1116        struct hashfile *f;
1117        int devnull;
1118        struct progress *progress = NULL;
1119
1120        if (!g) {
1121                graph_report("no commit-graph file loaded");
1122                return 1;
1123        }
1124
1125        verify_commit_graph_error = verify_commit_graph_lite(g);
1126        if (verify_commit_graph_error)
1127                return verify_commit_graph_error;
1128
1129        devnull = open("/dev/null", O_WRONLY);
1130        f = hashfd(devnull, NULL);
1131        hashwrite(f, g->data, g->data_len - g->hash_len);
1132        finalize_hashfile(f, checksum.hash, CSUM_CLOSE);
1133        if (!hasheq(checksum.hash, g->data + g->data_len - g->hash_len)) {
1134                graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt"));
1135                verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH;
1136        }
1137
1138        for (i = 0; i < g->num_commits; i++) {
1139                struct commit *graph_commit;
1140
1141                hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
1142
1143                if (i && oidcmp(&prev_oid, &cur_oid) >= 0)
1144                        graph_report("commit-graph has incorrect OID order: %s then %s",
1145                                     oid_to_hex(&prev_oid),
1146                                     oid_to_hex(&cur_oid));
1147
1148                oidcpy(&prev_oid, &cur_oid);
1149
1150                while (cur_oid.hash[0] > cur_fanout_pos) {
1151                        uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
1152
1153                        if (i != fanout_value)
1154                                graph_report("commit-graph has incorrect fanout value: fanout[%d] = %u != %u",
1155                                             cur_fanout_pos, fanout_value, i);
1156                        cur_fanout_pos++;
1157                }
1158
1159                graph_commit = lookup_commit(r, &cur_oid);
1160                if (!parse_commit_in_graph_one(r, g, graph_commit))
1161                        graph_report("failed to parse %s from commit-graph",
1162                                     oid_to_hex(&cur_oid));
1163        }
1164
1165        while (cur_fanout_pos < 256) {
1166                uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
1167
1168                if (g->num_commits != fanout_value)
1169                        graph_report("commit-graph has incorrect fanout value: fanout[%d] = %u != %u",
1170                                     cur_fanout_pos, fanout_value, i);
1171
1172                cur_fanout_pos++;
1173        }
1174
1175        if (verify_commit_graph_error & ~VERIFY_COMMIT_GRAPH_ERROR_HASH)
1176                return verify_commit_graph_error;
1177
1178        progress = start_progress(_("Verifying commits in commit graph"),
1179                                  g->num_commits);
1180        for (i = 0; i < g->num_commits; i++) {
1181                struct commit *graph_commit, *odb_commit;
1182                struct commit_list *graph_parents, *odb_parents;
1183                uint32_t max_generation = 0;
1184
1185                display_progress(progress, i + 1);
1186                hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
1187
1188                graph_commit = lookup_commit(r, &cur_oid);
1189                odb_commit = (struct commit *)create_object(r, cur_oid.hash, alloc_commit_node(r));
1190                if (parse_commit_internal(odb_commit, 0, 0)) {
1191                        graph_report("failed to parse %s from object database",
1192                                     oid_to_hex(&cur_oid));
1193                        continue;
1194                }
1195
1196                if (!oideq(&get_commit_tree_in_graph_one(r, g, graph_commit)->object.oid,
1197                           get_commit_tree_oid(odb_commit)))
1198                        graph_report("root tree OID for commit %s in commit-graph is %s != %s",
1199                                     oid_to_hex(&cur_oid),
1200                                     oid_to_hex(get_commit_tree_oid(graph_commit)),
1201                                     oid_to_hex(get_commit_tree_oid(odb_commit)));
1202
1203                graph_parents = graph_commit->parents;
1204                odb_parents = odb_commit->parents;
1205
1206                while (graph_parents) {
1207                        if (odb_parents == NULL) {
1208                                graph_report("commit-graph parent list for commit %s is too long",
1209                                             oid_to_hex(&cur_oid));
1210                                break;
1211                        }
1212
1213                        if (!oideq(&graph_parents->item->object.oid, &odb_parents->item->object.oid))
1214                                graph_report("commit-graph parent for %s is %s != %s",
1215                                             oid_to_hex(&cur_oid),
1216                                             oid_to_hex(&graph_parents->item->object.oid),
1217                                             oid_to_hex(&odb_parents->item->object.oid));
1218
1219                        if (graph_parents->item->generation > max_generation)
1220                                max_generation = graph_parents->item->generation;
1221
1222                        graph_parents = graph_parents->next;
1223                        odb_parents = odb_parents->next;
1224                }
1225
1226                if (odb_parents != NULL)
1227                        graph_report("commit-graph parent list for commit %s terminates early",
1228                                     oid_to_hex(&cur_oid));
1229
1230                if (!graph_commit->generation) {
1231                        if (generation_zero == GENERATION_NUMBER_EXISTS)
1232                                graph_report("commit-graph has generation number zero for commit %s, but non-zero elsewhere",
1233                                             oid_to_hex(&cur_oid));
1234                        generation_zero = GENERATION_ZERO_EXISTS;
1235                } else if (generation_zero == GENERATION_ZERO_EXISTS)
1236                        graph_report("commit-graph has non-zero generation number for commit %s, but zero elsewhere",
1237                                     oid_to_hex(&cur_oid));
1238
1239                if (generation_zero == GENERATION_ZERO_EXISTS)
1240                        continue;
1241
1242                /*
1243                 * If one of our parents has generation GENERATION_NUMBER_MAX, then
1244                 * our generation is also GENERATION_NUMBER_MAX. Decrement to avoid
1245                 * extra logic in the following condition.
1246                 */
1247                if (max_generation == GENERATION_NUMBER_MAX)
1248                        max_generation--;
1249
1250                if (graph_commit->generation != max_generation + 1)
1251                        graph_report("commit-graph generation for commit %s is %u != %u",
1252                                     oid_to_hex(&cur_oid),
1253                                     graph_commit->generation,
1254                                     max_generation + 1);
1255
1256                if (graph_commit->date != odb_commit->date)
1257                        graph_report("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime,
1258                                     oid_to_hex(&cur_oid),
1259                                     graph_commit->date,
1260                                     odb_commit->date);
1261        }
1262        stop_progress(&progress);
1263
1264        return verify_commit_graph_error;
1265}
1266
1267void free_commit_graph(struct commit_graph *g)
1268{
1269        if (!g)
1270                return;
1271        if (g->graph_fd >= 0) {
1272                munmap((void *)g->data, g->data_len);
1273                g->data = NULL;
1274                close(g->graph_fd);
1275        }
1276        free(g);
1277}