epoch.con commit [PATCH] Generic support for pulling refs (cd541a6)
   1/*
   2 * Copyright (c) 2005, Jon Seymour
   3 *
   4 * For more information about epoch theory on which this module is based,
   5 * refer to http://blackcubes.dyndns.org/epoch/. That web page defines
   6 * terms such as "epoch" and "minimal, non-linear epoch" and provides rationales
   7 * for some of the algorithms used here.
   8 *
   9 */
  10#include <stdlib.h>
  11#include <openssl/bn.h>         // provides arbitrary precision integers
  12                                // required to accurately represent fractional 
  13                                //mass
  14
  15#include "cache.h"
  16#include "commit.h"
  17#include "epoch.h"
  18
  19struct fraction {
  20        BIGNUM numerator;
  21        BIGNUM denominator;
  22};
  23
  24#define HAS_EXACTLY_ONE_PARENT(n) ((n)->parents && !(n)->parents->next)
  25
  26static BN_CTX *context = NULL;
  27static struct fraction *one = NULL;
  28static struct fraction *zero = NULL;
  29
  30static BN_CTX *get_BN_CTX()
  31{
  32        if (!context) {
  33                context = BN_CTX_new();
  34        }
  35        return context;
  36}
  37
  38static struct fraction *new_zero()
  39{
  40        struct fraction *result = xmalloc(sizeof(*result));
  41        BN_init(&result->numerator);
  42        BN_init(&result->denominator);
  43        BN_zero(&result->numerator);
  44        BN_one(&result->denominator);
  45        return result;
  46}
  47
  48static void clear_fraction(struct fraction *fraction)
  49{
  50        BN_clear(&fraction->numerator);
  51        BN_clear(&fraction->denominator);
  52}
  53
  54static struct fraction *divide(struct fraction *result, struct fraction *fraction, int divisor)
  55{
  56        BIGNUM bn_divisor;
  57
  58        BN_init(&bn_divisor);
  59        BN_set_word(&bn_divisor, divisor);
  60
  61        BN_copy(&result->numerator, &fraction->numerator);
  62        BN_mul(&result->denominator, &fraction->denominator, &bn_divisor, get_BN_CTX());
  63
  64        BN_clear(&bn_divisor);
  65        return result;
  66}
  67
  68static struct fraction *init_fraction(struct fraction *fraction)
  69{
  70        BN_init(&fraction->numerator);
  71        BN_init(&fraction->denominator);
  72        BN_zero(&fraction->numerator);
  73        BN_one(&fraction->denominator);
  74        return fraction;
  75}
  76
  77static struct fraction *get_one()
  78{
  79        if (!one) {
  80                one = new_zero();
  81                BN_one(&one->numerator);
  82        }
  83        return one;
  84}
  85
  86static struct fraction *get_zero()
  87{
  88        if (!zero) {
  89                zero = new_zero();
  90        }
  91        return zero;
  92}
  93
  94static struct fraction *copy(struct fraction *to, struct fraction *from)
  95{
  96        BN_copy(&to->numerator, &from->numerator);
  97        BN_copy(&to->denominator, &from->denominator);
  98        return to;
  99}
 100
 101static struct fraction *add(struct fraction *result, struct fraction *left, struct fraction *right)
 102{
 103        BIGNUM a, b, gcd;
 104
 105        BN_init(&a);
 106        BN_init(&b);
 107        BN_init(&gcd);
 108
 109        BN_mul(&a, &left->numerator, &right->denominator, get_BN_CTX());
 110        BN_mul(&b, &left->denominator, &right->numerator, get_BN_CTX());
 111        BN_mul(&result->denominator, &left->denominator, &right->denominator, get_BN_CTX());
 112        BN_add(&result->numerator, &a, &b);
 113
 114        BN_gcd(&gcd, &result->denominator, &result->numerator, get_BN_CTX());
 115        BN_div(&result->denominator, NULL, &result->denominator, &gcd, get_BN_CTX());
 116        BN_div(&result->numerator, NULL, &result->numerator, &gcd, get_BN_CTX());
 117
 118        BN_clear(&a);
 119        BN_clear(&b);
 120        BN_clear(&gcd);
 121
 122        return result;
 123}
 124
 125static int compare(struct fraction *left, struct fraction *right)
 126{
 127        BIGNUM a, b;
 128
 129        int result;
 130
 131        BN_init(&a);
 132        BN_init(&b);
 133
 134        BN_mul(&a, &left->numerator, &right->denominator, get_BN_CTX());
 135        BN_mul(&b, &left->denominator, &right->numerator, get_BN_CTX());
 136
 137        result = BN_cmp(&a, &b);
 138
 139        BN_clear(&a);
 140        BN_clear(&b);
 141
 142        return result;
 143}
 144
 145struct mass_counter {
 146        struct fraction seen;
 147        struct fraction pending;
 148};
 149
 150static struct mass_counter *new_mass_counter(struct commit *commit, struct fraction *pending)
 151{
 152        struct mass_counter *mass_counter = xmalloc(sizeof(*mass_counter));
 153        memset(mass_counter, 0, sizeof(*mass_counter));
 154
 155        init_fraction(&mass_counter->seen);
 156        init_fraction(&mass_counter->pending);
 157
 158        copy(&mass_counter->pending, pending);
 159        copy(&mass_counter->seen, get_zero());
 160
 161        if (commit->object.util) {
 162                die("multiple attempts to initialize mass counter for %s\n", sha1_to_hex(commit->object.sha1));
 163        }
 164
 165        commit->object.util = mass_counter;
 166
 167        return mass_counter;
 168}
 169
 170static void free_mass_counter(struct mass_counter *counter)
 171{
 172        clear_fraction(&counter->seen);
 173        clear_fraction(&counter->pending);
 174        free(counter);
 175}
 176
 177//
 178// Finds the base commit of a list of commits.
 179//
 180// One property of the commit being searched for is that every commit reachable 
 181// from the base commit is reachable from the commits in the starting list only 
 182// via paths that include the base commit.
 183//
 184// This algorithm uses a conservation of mass approach to find the base commit.
 185//
 186// We start by injecting one unit of mass into the graph at each
 187// of the commits in the starting list. Injecting mass into a commit
 188// is achieved by adding to its pending mass counter and, if it is not already
 189// enqueued, enqueuing the commit in a list of pending commits, in latest 
 190// commit date first order.
 191//
 192// The algorithm then preceeds to visit each commit in the pending queue.
 193// Upon each visit, the pending mass is added to the mass already seen for that 
 194// commit and then divided into N equal portions, where N is the number of 
 195// parents of the commit being visited. The divided portions are then injected 
 196// into each of the parents.
 197//
 198// The algorithm continues until we discover a commit which has seen all the
 199// mass originally injected or until we run out of things to do. 
 200//
 201// If we find a commit that has seen all the original mass, we have found
 202// the common base of all the commits in the starting list. 
 203//
 204// The algorithm does _not_ depend on accurate timestamps for correct operation.
 205// However, reasonably sane (e.g. non-random) timestamps are required in order 
 206// to prevent an exponential performance characteristic. The occasional 
 207// timestamp inaccuracy will not dramatically affect performance but may 
 208// result in more nodes being processed than strictly necessary.
 209//
 210// This procedure sets *boundary to the address of the base commit. It returns 
 211// non-zero if, and only if, there was a problem parsing one of the 
 212// commits discovered during the traversal.
 213//
 214static int find_base_for_list(struct commit_list *list, struct commit **boundary)
 215{
 216
 217        int ret = 0;
 218
 219        struct commit_list *cleaner = NULL;
 220        struct commit_list *pending = NULL;
 221
 222        *boundary = NULL;
 223
 224        struct fraction injected;
 225
 226        init_fraction(&injected);
 227
 228        for (; list; list = list->next) {
 229
 230                struct commit *item = list->item;
 231
 232                if (item->object.util || (item->object.flags & UNINTERESTING)) {
 233                        die("%s:%d:%s: logic error: this should not have happened - commit %s\n",
 234                            __FILE__, __LINE__, __FUNCTION__, sha1_to_hex(item->object.sha1));
 235                }
 236
 237                new_mass_counter(list->item, get_one());
 238                add(&injected, &injected, get_one());
 239
 240                commit_list_insert(list->item, &cleaner);
 241                commit_list_insert(list->item, &pending);
 242        }
 243
 244        while (!*boundary && pending && !ret) {
 245
 246                struct commit *latest = pop_commit(&pending);
 247
 248                struct mass_counter *latest_node = (struct mass_counter *) latest->object.util;
 249
 250                if ((ret = parse_commit(latest)))
 251                        continue;
 252
 253                add(&latest_node->seen, &latest_node->seen, &latest_node->pending);
 254
 255                int num_parents = count_parents(latest);
 256
 257                if (num_parents) {
 258
 259                        struct fraction distribution;
 260                        struct commit_list *parents;
 261
 262                        divide(init_fraction(&distribution), &latest_node->pending, num_parents);
 263
 264                        for (parents = latest->parents; parents; parents = parents->next) {
 265
 266                                struct commit *parent = parents->item;
 267                                struct mass_counter *parent_node = (struct mass_counter *) parent->object.util;
 268
 269                                if (!parent_node) {
 270
 271                                        parent_node = new_mass_counter(parent, &distribution);
 272
 273                                        insert_by_date(&pending, parent);
 274                                        commit_list_insert(parent, &cleaner);
 275
 276                                } else {
 277
 278                                        if (!compare(&parent_node->pending, get_zero())) {
 279                                                insert_by_date(&pending, parent);
 280                                        }
 281                                        add(&parent_node->pending, &parent_node->pending, &distribution);
 282
 283                                }
 284                        }
 285
 286                        clear_fraction(&distribution);
 287
 288                }
 289
 290                if (!compare(&latest_node->seen, &injected)) {
 291                        *boundary = latest;
 292                }
 293
 294                copy(&latest_node->pending, get_zero());
 295
 296        }
 297
 298        while (cleaner) {
 299
 300                struct commit *next = pop_commit(&cleaner);
 301                free_mass_counter((struct mass_counter *) next->object.util);
 302                next->object.util = NULL;
 303
 304        }
 305
 306        if (pending)
 307                free_commit_list(pending);
 308
 309        clear_fraction(&injected);
 310
 311        return ret;
 312
 313}
 314
 315
 316//
 317// Finds the base of an minimal, non-linear epoch, headed at head, by
 318// applying the find_base_for_list to a list consisting of the parents
 319//
 320static int find_base(struct commit *head, struct commit **boundary)
 321{
 322        int ret = 0;
 323        struct commit_list *pending = NULL;
 324        struct commit_list *next;
 325
 326        commit_list_insert(head, &pending);
 327        for (next = head->parents; next; next = next->next) {
 328                commit_list_insert(next->item, &pending);
 329        }
 330        ret = find_base_for_list(pending, boundary);
 331        free_commit_list(pending);
 332
 333        return ret;
 334}
 335
 336//
 337// This procedure traverses to the boundary of the first epoch in the epoch
 338// sequence of the epoch headed at head_of_epoch. This is either the end of
 339// the maximal linear epoch or the base of a minimal non-linear epoch.
 340//
 341// The queue of pending nodes is sorted in reverse date order and each node
 342// is currently in the queue at most once.
 343//
 344static int find_next_epoch_boundary(struct commit *head_of_epoch, struct commit **boundary)
 345{
 346        int ret;
 347        struct commit *item = head_of_epoch;
 348
 349        ret = parse_commit(item);
 350        if (ret)
 351                return ret;
 352
 353        if (HAS_EXACTLY_ONE_PARENT(item)) {
 354
 355                // we are at the start of a maximimal linear epoch .. traverse to the end
 356
 357                // traverse to the end of a maximal linear epoch
 358                while (HAS_EXACTLY_ONE_PARENT(item) && !ret) {
 359                        item = item->parents->item;
 360                        ret = parse_commit(item);
 361                }
 362                *boundary = item;
 363
 364        } else {
 365
 366                // otherwise, we are at the start of a minimal, non-linear
 367                // epoch - find the common base of all parents.
 368
 369                ret = find_base(item, boundary);
 370
 371        }
 372
 373        return ret;
 374}
 375
 376//
 377// Returns non-zero if parent is known to be a parent of child.
 378//
 379static int is_parent_of(struct commit *parent, struct commit *child)
 380{
 381        struct commit_list *parents;
 382        for (parents = child->parents; parents; parents = parents->next) {
 383                if (!memcmp(parent->object.sha1, parents->item->object.sha1, sizeof(parents->item->object.sha1)))
 384                        return 1;
 385        }
 386        return 0;
 387}
 388
 389//
 390// Pushes an item onto the merge order stack. If the top of the stack is
 391// marked as being a possible "break", we check to see whether it actually
 392// is a break.
 393//
 394static void push_onto_merge_order_stack(struct commit_list **stack, struct commit *item)
 395{
 396        struct commit_list *top = *stack;
 397        if (top && (top->item->object.flags & DISCONTINUITY)) {
 398                if (is_parent_of(top->item, item)) {
 399                        top->item->object.flags &= ~DISCONTINUITY;
 400                }
 401        }
 402        commit_list_insert(item, stack);
 403}
 404
 405//
 406// Marks all interesting, visited commits reachable from this commit
 407// as uninteresting. We stop recursing when we reach the epoch boundary,
 408// an unvisited node or a node that has already been marking uninteresting.
 409// This doesn't actually mark all ancestors between the start node and the
 410// epoch boundary uninteresting, but does ensure that they will 
 411// eventually be marked uninteresting when the main sort_first_epoch 
 412// traversal eventually reaches them. 
 413//
 414static void mark_ancestors_uninteresting(struct commit *commit)
 415{
 416        unsigned int flags = commit->object.flags;
 417        int visited = flags & VISITED;
 418        int boundary = flags & BOUNDARY;
 419        int uninteresting = flags & UNINTERESTING;
 420
 421        if (uninteresting || boundary || !visited) {
 422                commit->object.flags |= UNINTERESTING;
 423                return;
 424
 425                // we only need to recurse if
 426                //      we are not on the boundary, and,
 427                //      we have not already been marked uninteresting, and,
 428                //      we have already been visited.
 429
 430                //
 431                // the main sort_first_epoch traverse will 
 432                // mark unreachable all uninteresting, unvisited parents 
 433                // as they are visited so there is no need to duplicate
 434                // that traversal here.
 435                //
 436                // similarly, if we are already marked uninteresting
 437                // then either all ancestors have already been marked
 438                // uninteresting or will be once the sort_first_epoch
 439                // traverse reaches them.
 440                //
 441        }
 442
 443        struct commit_list *next;
 444
 445        for (next = commit->parents; next; next = next->next)
 446                mark_ancestors_uninteresting(next->item);
 447}
 448
 449//
 450// Sorts the nodes of the first epoch of the epoch sequence of the epoch headed at head
 451// into merge order.
 452//
 453static void sort_first_epoch(struct commit *head, struct commit_list **stack)
 454{
 455        struct commit_list *parents;
 456        struct commit_list *reversed_parents = NULL;
 457
 458        head->object.flags |= VISITED;
 459
 460        //
 461        // parse_commit builds the parent list in reverse order with respect to the order of
 462        // the git-commit-tree arguments.
 463        //
 464        // so we need to reverse this list to output the oldest (or most "local") commits last.
 465        //
 466
 467        for (parents = head->parents; parents; parents = parents->next)
 468                commit_list_insert(parents->item, &reversed_parents);
 469
 470        //
 471        // todo: by sorting the parents in a different order, we can alter the 
 472        // merge order to show contemporaneous changes in parallel branches
 473        // occurring after "local" changes. This is useful for a developer
 474        // when a developer wants to see all changes that were incorporated
 475        // into the same merge as her own changes occur after her own
 476        // changes.
 477        //
 478
 479        while (reversed_parents) {
 480
 481                struct commit *parent = pop_commit(&reversed_parents);
 482
 483                if (head->object.flags & UNINTERESTING) {
 484                        // propagates the uninteresting bit to
 485                        // all parents. if we have already visited
 486                        // this parent, then the uninteresting bit
 487                        // will be propagated to each reachable 
 488                        // commit that is still not marked uninteresting
 489                        // and won't otherwise be reached.
 490                        mark_ancestors_uninteresting(parent);
 491                }
 492
 493                if (!(parent->object.flags & VISITED)) {
 494                        if (parent->object.flags & BOUNDARY) {
 495
 496                                if (*stack) {
 497                                        die("something else is on the stack - %s\n", sha1_to_hex((*stack)->item->object.sha1));
 498                                }
 499
 500                                push_onto_merge_order_stack(stack, parent);
 501                                parent->object.flags |= VISITED;
 502
 503                        } else {
 504
 505                                sort_first_epoch(parent, stack);
 506
 507                                if (reversed_parents) {
 508                                        //
 509                                        // this indicates a possible discontinuity
 510                                        // it may not be be actual discontinuity if
 511                                        // the head of parent N happens to be the tail
 512                                        // of parent N+1
 513                                        //
 514                                        // the next push onto the stack will resolve the 
 515                                        // question
 516                                        //
 517                                        (*stack)->item->object.flags |= DISCONTINUITY;
 518                                }
 519                        }
 520                }
 521        }
 522
 523        push_onto_merge_order_stack(stack, head);
 524}
 525
 526//
 527// Emit the contents of the stack. 
 528//
 529// The stack is freed and replaced by NULL.
 530//
 531// Sets the return value to STOP if no further output should be generated.
 532//
 533static int emit_stack(struct commit_list **stack, emitter_func emitter)
 534{
 535        unsigned int seen = 0;
 536        int action = CONTINUE;
 537
 538        while (*stack && (action != STOP)) {
 539
 540                struct commit *next = pop_commit(stack);
 541
 542                seen |= next->object.flags;
 543
 544                if (*stack) {
 545                        action = (*emitter) (next);
 546                }
 547        }
 548
 549        if (*stack) {
 550                free_commit_list(*stack);
 551                *stack = NULL;
 552        }
 553
 554        return (action == STOP || (seen & UNINTERESTING)) ? STOP : CONTINUE;
 555}
 556
 557//
 558// Sorts an arbitrary epoch into merge order by sorting each epoch
 559// of its epoch sequence into order.
 560//
 561// Note: this algorithm currently leaves traces of its execution in the
 562// object flags of nodes it discovers. This should probably be fixed.
 563//
 564static int sort_in_merge_order(struct commit *head_of_epoch, emitter_func emitter)
 565{
 566        struct commit *next = head_of_epoch;
 567        int ret = 0;
 568        int action = CONTINUE;
 569
 570        ret = parse_commit(head_of_epoch);
 571
 572        while (next && next->parents && !ret && (action != STOP)) {
 573
 574                struct commit *base = NULL;
 575
 576                if ((ret = find_next_epoch_boundary(next, &base)))
 577                        return ret;
 578
 579                next->object.flags |= BOUNDARY;
 580                if (base) {
 581                        base->object.flags |= BOUNDARY;
 582                }
 583
 584                if (HAS_EXACTLY_ONE_PARENT(next)) {
 585
 586                        while (HAS_EXACTLY_ONE_PARENT(next)
 587                               && (action != STOP)
 588                               && !ret) {
 589
 590                                if (next->object.flags & UNINTERESTING) {
 591                                        action = STOP;
 592                                } else {
 593                                        action = (*emitter) (next);
 594                                }
 595
 596                                if (action != STOP) {
 597                                        next = next->parents->item;
 598                                        ret = parse_commit(next);
 599                                }
 600                        }
 601
 602                } else {
 603
 604                        struct commit_list *stack = NULL;
 605                        sort_first_epoch(next, &stack);
 606                        action = emit_stack(&stack, emitter);
 607                        next = base;
 608
 609                }
 610
 611        }
 612
 613        if (next && (action != STOP) && !ret) {
 614                (*emitter) (next);
 615        }
 616
 617        return ret;
 618}
 619
 620//
 621// Sorts the nodes reachable from a starting list in merge order, we 
 622// first find the base for the starting list and then sort all nodes in this 
 623// subgraph using the sort_first_epoch algorithm. Once we have reached the base
 624// we can continue sorting using sort_in_merge_order.
 625//
 626int sort_list_in_merge_order(struct commit_list *list, emitter_func emitter)
 627{
 628        struct commit_list *stack = NULL;
 629        struct commit *base;
 630
 631        int ret = 0;
 632        int action = CONTINUE;
 633
 634        struct commit_list *reversed = NULL;
 635
 636        for (; list; list = list->next) {
 637
 638                struct commit *next = list->item;
 639
 640                if (!(next->object.flags & UNINTERESTING)) {
 641                        if (next->object.flags & DUPCHECK) {
 642                                fprintf(stderr, "%s: duplicate commit %s ignored\n", __FUNCTION__, sha1_to_hex(next->object.sha1));
 643                        } else {
 644                                next->object.flags |= DUPCHECK;
 645                                commit_list_insert(list->item, &reversed);
 646                        }
 647                }
 648        }
 649
 650        if (!reversed->next) {
 651
 652                // if there is only one element in the list, we can sort it using 
 653                // sort_in_merge_order.
 654
 655                base = reversed->item;
 656
 657        } else {
 658
 659                // otherwise, we search for the base of the list
 660
 661                if ((ret = find_base_for_list(reversed, &base)))
 662                        return ret;
 663
 664                if (base) {
 665                        base->object.flags |= BOUNDARY;
 666                }
 667
 668                while (reversed) {
 669                        sort_first_epoch(pop_commit(&reversed), &stack);
 670                        if (reversed) {
 671                                //
 672                                // if we have more commits to push, then the
 673                                // first push for the next parent may (or may not)
 674                                // represent a discontinuity with respect to the
 675                                // parent currently on the top of the stack.
 676                                //
 677                                // mark it for checking here, and check it
 678                                // with the next push...see sort_first_epoch for
 679                                // more details.
 680                                //
 681                                stack->item->object.flags |= DISCONTINUITY;
 682                        }
 683                }
 684
 685                action = emit_stack(&stack, emitter);
 686        }
 687
 688        if (base && (action != STOP)) {
 689                ret = sort_in_merge_order(base, emitter);
 690        }
 691
 692        return ret;
 693}