epoch.con commit [PATCH] three --merge-order bug fixes (4e73467)
   1/*
   2 * Copyright (c) 2005, Jon Seymour
   3 *
   4 * For more information about epoch theory on which this module is based,
   5 * refer to http://blackcubes.dyndns.org/epoch/. That web page defines
   6 * terms such as "epoch" and "minimal, non-linear epoch" and provides rationales
   7 * for some of the algorithms used here.
   8 *
   9 */
  10#include <stdlib.h>
  11#include <openssl/bn.h>         // provides arbitrary precision integers
  12                                // required to accurately represent fractional 
  13                                //mass
  14
  15#include "cache.h"
  16#include "commit.h"
  17#include "epoch.h"
  18
  19struct fraction {
  20        BIGNUM numerator;
  21        BIGNUM denominator;
  22};
  23
  24#define HAS_EXACTLY_ONE_PARENT(n) ((n)->parents && !(n)->parents->next)
  25
  26static BN_CTX *context = NULL;
  27static struct fraction *one = NULL;
  28static struct fraction *zero = NULL;
  29
  30static BN_CTX *get_BN_CTX()
  31{
  32        if (!context) {
  33                context = BN_CTX_new();
  34        }
  35        return context;
  36}
  37
  38static struct fraction *new_zero()
  39{
  40        struct fraction *result = xmalloc(sizeof(*result));
  41        BN_init(&result->numerator);
  42        BN_init(&result->denominator);
  43        BN_zero(&result->numerator);
  44        BN_one(&result->denominator);
  45        return result;
  46}
  47
  48static void clear_fraction(struct fraction *fraction)
  49{
  50        BN_clear(&fraction->numerator);
  51        BN_clear(&fraction->denominator);
  52}
  53
  54static struct fraction *divide(struct fraction *result, struct fraction *fraction, int divisor)
  55{
  56        BIGNUM bn_divisor;
  57
  58        BN_init(&bn_divisor);
  59        BN_set_word(&bn_divisor, divisor);
  60
  61        BN_copy(&result->numerator, &fraction->numerator);
  62        BN_mul(&result->denominator, &fraction->denominator, &bn_divisor, get_BN_CTX());
  63
  64        BN_clear(&bn_divisor);
  65        return result;
  66}
  67
  68static struct fraction *init_fraction(struct fraction *fraction)
  69{
  70        BN_init(&fraction->numerator);
  71        BN_init(&fraction->denominator);
  72        BN_zero(&fraction->numerator);
  73        BN_one(&fraction->denominator);
  74        return fraction;
  75}
  76
  77static struct fraction *get_one()
  78{
  79        if (!one) {
  80                one = new_zero();
  81                BN_one(&one->numerator);
  82        }
  83        return one;
  84}
  85
  86static struct fraction *get_zero()
  87{
  88        if (!zero) {
  89                zero = new_zero();
  90        }
  91        return zero;
  92}
  93
  94static struct fraction *copy(struct fraction *to, struct fraction *from)
  95{
  96        BN_copy(&to->numerator, &from->numerator);
  97        BN_copy(&to->denominator, &from->denominator);
  98        return to;
  99}
 100
 101static struct fraction *add(struct fraction *result, struct fraction *left, struct fraction *right)
 102{
 103        BIGNUM a, b, gcd;
 104
 105        BN_init(&a);
 106        BN_init(&b);
 107        BN_init(&gcd);
 108
 109        BN_mul(&a, &left->numerator, &right->denominator, get_BN_CTX());
 110        BN_mul(&b, &left->denominator, &right->numerator, get_BN_CTX());
 111        BN_mul(&result->denominator, &left->denominator, &right->denominator, get_BN_CTX());
 112        BN_add(&result->numerator, &a, &b);
 113
 114        BN_gcd(&gcd, &result->denominator, &result->numerator, get_BN_CTX());
 115        BN_div(&result->denominator, NULL, &result->denominator, &gcd, get_BN_CTX());
 116        BN_div(&result->numerator, NULL, &result->numerator, &gcd, get_BN_CTX());
 117
 118        BN_clear(&a);
 119        BN_clear(&b);
 120        BN_clear(&gcd);
 121
 122        return result;
 123}
 124
 125static int compare(struct fraction *left, struct fraction *right)
 126{
 127        BIGNUM a, b;
 128
 129        int result;
 130
 131        BN_init(&a);
 132        BN_init(&b);
 133
 134        BN_mul(&a, &left->numerator, &right->denominator, get_BN_CTX());
 135        BN_mul(&b, &left->denominator, &right->numerator, get_BN_CTX());
 136
 137        result = BN_cmp(&a, &b);
 138
 139        BN_clear(&a);
 140        BN_clear(&b);
 141
 142        return result;
 143}
 144
 145struct mass_counter {
 146        struct fraction seen;
 147        struct fraction pending;
 148};
 149
 150static struct mass_counter *new_mass_counter(struct commit *commit, struct fraction *pending)
 151{
 152        struct mass_counter *mass_counter = xmalloc(sizeof(*mass_counter));
 153        memset(mass_counter, 0, sizeof(*mass_counter));
 154
 155        init_fraction(&mass_counter->seen);
 156        init_fraction(&mass_counter->pending);
 157
 158        copy(&mass_counter->pending, pending);
 159        copy(&mass_counter->seen, get_zero());
 160
 161        if (commit->object.util) {
 162                die("multiple attempts to initialize mass counter for %s\n", sha1_to_hex(commit->object.sha1));
 163        }
 164
 165        commit->object.util = mass_counter;
 166
 167        return mass_counter;
 168}
 169
 170static void free_mass_counter(struct mass_counter *counter)
 171{
 172        clear_fraction(&counter->seen);
 173        clear_fraction(&counter->pending);
 174        free(counter);
 175}
 176
 177//
 178// Finds the base commit of a list of commits.
 179//
 180// One property of the commit being searched for is that every commit reachable 
 181// from the base commit is reachable from the commits in the starting list only 
 182// via paths that include the base commit.
 183//
 184// This algorithm uses a conservation of mass approach to find the base commit.
 185//
 186// We start by injecting one unit of mass into the graph at each
 187// of the commits in the starting list. Injecting mass into a commit
 188// is achieved by adding to its pending mass counter and, if it is not already
 189// enqueued, enqueuing the commit in a list of pending commits, in latest 
 190// commit date first order.
 191//
 192// The algorithm then preceeds to visit each commit in the pending queue.
 193// Upon each visit, the pending mass is added to the mass already seen for that 
 194// commit and then divided into N equal portions, where N is the number of 
 195// parents of the commit being visited. The divided portions are then injected 
 196// into each of the parents.
 197//
 198// The algorithm continues until we discover a commit which has seen all the
 199// mass originally injected or until we run out of things to do. 
 200//
 201// If we find a commit that has seen all the original mass, we have found
 202// the common base of all the commits in the starting list. 
 203//
 204// The algorithm does _not_ depend on accurate timestamps for correct operation.
 205// However, reasonably sane (e.g. non-random) timestamps are required in order 
 206// to prevent an exponential performance characteristic. The occasional 
 207// timestamp inaccuracy will not dramatically affect performance but may 
 208// result in more nodes being processed than strictly necessary.
 209//
 210// This procedure sets *boundary to the address of the base commit. It returns 
 211// non-zero if, and only if, there was a problem parsing one of the 
 212// commits discovered during the traversal.
 213//
 214static int find_base_for_list(struct commit_list *list, struct commit **boundary)
 215{
 216
 217        int ret = 0;
 218
 219        struct commit_list *cleaner = NULL;
 220        struct commit_list *pending = NULL;
 221
 222        *boundary = NULL;
 223
 224        struct fraction injected;
 225
 226        init_fraction(&injected);
 227
 228        for (; list; list = list->next) {
 229
 230                struct commit *item = list->item;
 231
 232                if (item->object.util) {
 233                        die("%s:%d:%s: logic error: this should not have happened - commit %s\n",
 234                            __FILE__, __LINE__, __FUNCTION__, sha1_to_hex(item->object.sha1));
 235                }
 236
 237                new_mass_counter(list->item, get_one());
 238                add(&injected, &injected, get_one());
 239
 240                commit_list_insert(list->item, &cleaner);
 241                commit_list_insert(list->item, &pending);
 242        }
 243
 244        while (!*boundary && pending && !ret) {
 245
 246                struct commit *latest = pop_commit(&pending);
 247
 248                struct mass_counter *latest_node = (struct mass_counter *) latest->object.util;
 249
 250                if ((ret = parse_commit(latest)))
 251                        continue;
 252
 253                add(&latest_node->seen, &latest_node->seen, &latest_node->pending);
 254
 255                int num_parents = count_parents(latest);
 256
 257                if (num_parents) {
 258
 259                        struct fraction distribution;
 260                        struct commit_list *parents;
 261
 262                        divide(init_fraction(&distribution), &latest_node->pending, num_parents);
 263
 264                        for (parents = latest->parents; parents; parents = parents->next) {
 265
 266                                struct commit *parent = parents->item;
 267                                struct mass_counter *parent_node = (struct mass_counter *) parent->object.util;
 268
 269                                if (!parent_node) {
 270
 271                                        parent_node = new_mass_counter(parent, &distribution);
 272
 273                                        insert_by_date(&pending, parent);
 274                                        commit_list_insert(parent, &cleaner);
 275
 276                                } else {
 277
 278                                        if (!compare(&parent_node->pending, get_zero())) {
 279                                                insert_by_date(&pending, parent);
 280                                        }
 281                                        add(&parent_node->pending, &parent_node->pending, &distribution);
 282
 283                                }
 284                        }
 285
 286                        clear_fraction(&distribution);
 287
 288                }
 289
 290                if (!compare(&latest_node->seen, &injected)) {
 291                        *boundary = latest;
 292                }
 293
 294                copy(&latest_node->pending, get_zero());
 295
 296        }
 297
 298        while (cleaner) {
 299
 300                struct commit *next = pop_commit(&cleaner);
 301                free_mass_counter((struct mass_counter *) next->object.util);
 302                next->object.util = NULL;
 303
 304        }
 305
 306        if (pending)
 307                free_commit_list(pending);
 308
 309        clear_fraction(&injected);
 310
 311        return ret;
 312
 313}
 314
 315
 316//
 317// Finds the base of an minimal, non-linear epoch, headed at head, by
 318// applying the find_base_for_list to a list consisting of the parents
 319//
 320static int find_base(struct commit *head, struct commit **boundary)
 321{
 322        int ret = 0;
 323        struct commit_list *pending = NULL;
 324        struct commit_list *next;
 325
 326        for (next = head->parents; next; next = next->next) {
 327                commit_list_insert(next->item, &pending);
 328        }
 329        ret = find_base_for_list(pending, boundary);
 330        free_commit_list(pending);
 331
 332        return ret;
 333}
 334
 335//
 336// This procedure traverses to the boundary of the first epoch in the epoch
 337// sequence of the epoch headed at head_of_epoch. This is either the end of
 338// the maximal linear epoch or the base of a minimal non-linear epoch.
 339//
 340// The queue of pending nodes is sorted in reverse date order and each node
 341// is currently in the queue at most once.
 342//
 343static int find_next_epoch_boundary(struct commit *head_of_epoch, struct commit **boundary)
 344{
 345        int ret;
 346        struct commit *item = head_of_epoch;
 347
 348        ret = parse_commit(item);
 349        if (ret)
 350                return ret;
 351
 352        if (HAS_EXACTLY_ONE_PARENT(item)) {
 353
 354                // we are at the start of a maximimal linear epoch .. traverse to the end
 355
 356                // traverse to the end of a maximal linear epoch
 357                while (HAS_EXACTLY_ONE_PARENT(item) && !ret) {
 358                        item = item->parents->item;
 359                        ret = parse_commit(item);
 360                }
 361                *boundary = item;
 362
 363        } else {
 364
 365                // otherwise, we are at the start of a minimal, non-linear
 366                // epoch - find the common base of all parents.
 367
 368                ret = find_base(item, boundary);
 369
 370        }
 371
 372        return ret;
 373}
 374
 375//
 376// Returns non-zero if parent is known to be a parent of child.
 377//
 378static int is_parent_of(struct commit *parent, struct commit *child)
 379{
 380        struct commit_list *parents;
 381        for (parents = child->parents; parents; parents = parents->next) {
 382                if (!memcmp(parent->object.sha1, parents->item->object.sha1, sizeof(parents->item->object.sha1)))
 383                        return 1;
 384        }
 385        return 0;
 386}
 387
 388//
 389// Pushes an item onto the merge order stack. If the top of the stack is
 390// marked as being a possible "break", we check to see whether it actually
 391// is a break.
 392//
 393static void push_onto_merge_order_stack(struct commit_list **stack, struct commit *item)
 394{
 395        struct commit_list *top = *stack;
 396        if (top && (top->item->object.flags & DISCONTINUITY)) {
 397                if (is_parent_of(top->item, item)) {
 398                        top->item->object.flags &= ~DISCONTINUITY;
 399                }
 400        }
 401        commit_list_insert(item, stack);
 402}
 403
 404//
 405// Marks all interesting, visited commits reachable from this commit
 406// as uninteresting. We stop recursing when we reach the epoch boundary,
 407// an unvisited node or a node that has already been marking uninteresting.
 408// This doesn't actually mark all ancestors between the start node and the
 409// epoch boundary uninteresting, but does ensure that they will 
 410// eventually be marked uninteresting when the main sort_first_epoch 
 411// traversal eventually reaches them. 
 412//
 413static void mark_ancestors_uninteresting(struct commit *commit)
 414{
 415        unsigned int flags = commit->object.flags;
 416        int visited = flags & VISITED;
 417        int boundary = flags & BOUNDARY;
 418        int uninteresting = flags & UNINTERESTING;
 419
 420        commit->object.flags |= UNINTERESTING;
 421        if (uninteresting || boundary || !visited) {
 422                return;
 423
 424                // we only need to recurse if
 425                //      we are not on the boundary, and,
 426                //      we have not already been marked uninteresting, and,
 427                //      we have already been visited.
 428
 429                //
 430                // the main sort_first_epoch traverse will 
 431                // mark unreachable all uninteresting, unvisited parents 
 432                // as they are visited so there is no need to duplicate
 433                // that traversal here.
 434                //
 435                // similarly, if we are already marked uninteresting
 436                // then either all ancestors have already been marked
 437                // uninteresting or will be once the sort_first_epoch
 438                // traverse reaches them.
 439                //
 440        }
 441
 442        struct commit_list *next;
 443
 444        for (next = commit->parents; next; next = next->next)
 445                mark_ancestors_uninteresting(next->item);
 446}
 447
 448//
 449// Sorts the nodes of the first epoch of the epoch sequence of the epoch headed at head
 450// into merge order.
 451//
 452static void sort_first_epoch(struct commit *head, struct commit_list **stack)
 453{
 454        struct commit_list *parents;
 455        struct commit_list *reversed_parents = NULL;
 456
 457        head->object.flags |= VISITED;
 458
 459        //
 460        // parse_commit builds the parent list in reverse order with respect to the order of
 461        // the git-commit-tree arguments.
 462        //
 463        // so we need to reverse this list to output the oldest (or most "local") commits last.
 464        //
 465
 466        for (parents = head->parents; parents; parents = parents->next)
 467                commit_list_insert(parents->item, &reversed_parents);
 468
 469        //
 470        // todo: by sorting the parents in a different order, we can alter the 
 471        // merge order to show contemporaneous changes in parallel branches
 472        // occurring after "local" changes. This is useful for a developer
 473        // when a developer wants to see all changes that were incorporated
 474        // into the same merge as her own changes occur after her own
 475        // changes.
 476        //
 477
 478        while (reversed_parents) {
 479
 480                struct commit *parent = pop_commit(&reversed_parents);
 481
 482                if (head->object.flags & UNINTERESTING) {
 483                        // propagates the uninteresting bit to
 484                        // all parents. if we have already visited
 485                        // this parent, then the uninteresting bit
 486                        // will be propagated to each reachable 
 487                        // commit that is still not marked uninteresting
 488                        // and won't otherwise be reached.
 489                        mark_ancestors_uninteresting(parent);
 490                }
 491
 492                if (!(parent->object.flags & VISITED)) {
 493                        if (parent->object.flags & BOUNDARY) {
 494
 495                                if (*stack) {
 496                                        die("something else is on the stack - %s\n", sha1_to_hex((*stack)->item->object.sha1));
 497                                }
 498
 499                                push_onto_merge_order_stack(stack, parent);
 500                                parent->object.flags |= VISITED;
 501
 502                        } else {
 503
 504                                sort_first_epoch(parent, stack);
 505
 506                                if (reversed_parents) {
 507                                        //
 508                                        // this indicates a possible discontinuity
 509                                        // it may not be be actual discontinuity if
 510                                        // the head of parent N happens to be the tail
 511                                        // of parent N+1
 512                                        //
 513                                        // the next push onto the stack will resolve the 
 514                                        // question
 515                                        //
 516                                        (*stack)->item->object.flags |= DISCONTINUITY;
 517                                }
 518                        }
 519                }
 520        }
 521
 522        push_onto_merge_order_stack(stack, head);
 523}
 524
 525//
 526// Emit the contents of the stack. 
 527//
 528// The stack is freed and replaced by NULL.
 529//
 530// Sets the return value to STOP if no further output should be generated.
 531//
 532static int emit_stack(struct commit_list **stack, emitter_func emitter)
 533{
 534        unsigned int seen = 0;
 535        int action = CONTINUE;
 536
 537        while (*stack && (action != STOP)) {
 538
 539                struct commit *next = pop_commit(stack);
 540
 541                seen |= next->object.flags;
 542
 543                if (*stack) {
 544                        action = (*emitter) (next);
 545                }
 546        }
 547
 548        if (*stack) {
 549                free_commit_list(*stack);
 550                *stack = NULL;
 551        }
 552
 553        return (action == STOP || (seen & UNINTERESTING)) ? STOP : CONTINUE;
 554}
 555
 556//
 557// Sorts an arbitrary epoch into merge order by sorting each epoch
 558// of its epoch sequence into order.
 559//
 560// Note: this algorithm currently leaves traces of its execution in the
 561// object flags of nodes it discovers. This should probably be fixed.
 562//
 563static int sort_in_merge_order(struct commit *head_of_epoch, emitter_func emitter)
 564{
 565        struct commit *next = head_of_epoch;
 566        int ret = 0;
 567        int action = CONTINUE;
 568
 569        ret = parse_commit(head_of_epoch);
 570
 571        while (next && next->parents && !ret && (action != STOP)) {
 572
 573                struct commit *base = NULL;
 574
 575                if ((ret = find_next_epoch_boundary(next, &base)))
 576                        return ret;
 577
 578                next->object.flags |= BOUNDARY;
 579                if (base) {
 580                        base->object.flags |= BOUNDARY;
 581                }
 582
 583                if (HAS_EXACTLY_ONE_PARENT(next)) {
 584
 585                        while (HAS_EXACTLY_ONE_PARENT(next)
 586                               && (action != STOP)
 587                               && !ret) {
 588
 589                                if (next->object.flags & UNINTERESTING) {
 590                                        action = STOP;
 591                                } else {
 592                                        action = (*emitter) (next);
 593                                }
 594
 595                                if (action != STOP) {
 596                                        next = next->parents->item;
 597                                        ret = parse_commit(next);
 598                                }
 599                        }
 600
 601                } else {
 602
 603                        struct commit_list *stack = NULL;
 604                        sort_first_epoch(next, &stack);
 605                        action = emit_stack(&stack, emitter);
 606                        next = base;
 607
 608                }
 609
 610        }
 611
 612        if (next && (action != STOP) && !ret) {
 613                (*emitter) (next);
 614        }
 615
 616        return ret;
 617}
 618
 619//
 620// Sorts the nodes reachable from a starting list in merge order, we 
 621// first find the base for the starting list and then sort all nodes in this 
 622// subgraph using the sort_first_epoch algorithm. Once we have reached the base
 623// we can continue sorting using sort_in_merge_order.
 624//
 625int sort_list_in_merge_order(struct commit_list *list, emitter_func emitter)
 626{
 627        struct commit_list *stack = NULL;
 628        struct commit *base;
 629
 630        int ret = 0;
 631        int action = CONTINUE;
 632
 633        struct commit_list *reversed = NULL;
 634
 635        for (; list; list = list->next) {
 636
 637                struct commit *next = list->item;
 638
 639                if (!(next->object.flags & UNINTERESTING)) {
 640                        if (next->object.flags & DUPCHECK) {
 641                                fprintf(stderr, "%s: duplicate commit %s ignored\n", __FUNCTION__, sha1_to_hex(next->object.sha1));
 642                        } else {
 643                                next->object.flags |= DUPCHECK;
 644                                commit_list_insert(list->item, &reversed);
 645                        }
 646                }
 647        }
 648
 649        if (!reversed->next) {
 650
 651                // if there is only one element in the list, we can sort it using 
 652                // sort_in_merge_order.
 653
 654                base = reversed->item;
 655
 656        } else {
 657
 658                // otherwise, we search for the base of the list
 659
 660                if ((ret = find_base_for_list(reversed, &base)))
 661                        return ret;
 662
 663                if (base) {
 664                        base->object.flags |= BOUNDARY;
 665                }
 666
 667                while (reversed) {
 668                        sort_first_epoch(pop_commit(&reversed), &stack);
 669                        if (reversed) {
 670                                //
 671                                // if we have more commits to push, then the
 672                                // first push for the next parent may (or may not)
 673                                // represent a discontinuity with respect to the
 674                                // parent currently on the top of the stack.
 675                                //
 676                                // mark it for checking here, and check it
 677                                // with the next push...see sort_first_epoch for
 678                                // more details.
 679                                //
 680                                stack->item->object.flags |= DISCONTINUITY;
 681                        }
 682                }
 683
 684                action = emit_stack(&stack, emitter);
 685        }
 686
 687        if (base && (action != STOP)) {
 688                ret = sort_in_merge_order(base, emitter);
 689        }
 690
 691        return ret;
 692}