1/* 2 * Copyright (c) 2005, Jon Seymour 3 * 4 * For more information about epoch theory on which this module is based, 5 * refer to http://blackcubes.dyndns.org/epoch/. That web page defines 6 * terms such as "epoch" and "minimal, non-linear epoch" and provides rationales 7 * for some of the algorithms used here. 8 * 9 */ 10#include <stdlib.h> 11#include <openssl/bn.h>// provides arbitrary precision integers 12// required to accurately represent fractional 13//mass 14 15#include"cache.h" 16#include"commit.h" 17#include"epoch.h" 18 19struct fraction { 20 BIGNUM numerator; 21 BIGNUM denominator; 22}; 23 24#define HAS_EXACTLY_ONE_PARENT(n) ((n)->parents && !(n)->parents->next) 25 26static BN_CTX *context = NULL; 27static struct fraction *one = NULL; 28static struct fraction *zero = NULL; 29 30static BN_CTX *get_BN_CTX() 31{ 32if(!context) { 33 context =BN_CTX_new(); 34} 35return context; 36} 37 38static struct fraction *new_zero() 39{ 40struct fraction *result =xmalloc(sizeof(*result)); 41BN_init(&result->numerator); 42BN_init(&result->denominator); 43BN_zero(&result->numerator); 44BN_one(&result->denominator); 45return result; 46} 47 48static voidclear_fraction(struct fraction *fraction) 49{ 50BN_clear(&fraction->numerator); 51BN_clear(&fraction->denominator); 52} 53 54static struct fraction *divide(struct fraction *result,struct fraction *fraction,int divisor) 55{ 56 BIGNUM bn_divisor; 57 58BN_init(&bn_divisor); 59BN_set_word(&bn_divisor, divisor); 60 61BN_copy(&result->numerator, &fraction->numerator); 62BN_mul(&result->denominator, &fraction->denominator, &bn_divisor,get_BN_CTX()); 63 64BN_clear(&bn_divisor); 65return result; 66} 67 68static struct fraction *init_fraction(struct fraction *fraction) 69{ 70BN_init(&fraction->numerator); 71BN_init(&fraction->denominator); 72BN_zero(&fraction->numerator); 73BN_one(&fraction->denominator); 74return fraction; 75} 76 77static struct fraction *get_one() 78{ 79if(!one) { 80 one =new_zero(); 81BN_one(&one->numerator); 82} 83return one; 84} 85 86static struct fraction *get_zero() 87{ 88if(!zero) { 89 zero =new_zero(); 90} 91return zero; 92} 93 94static struct fraction *copy(struct fraction *to,struct fraction *from) 95{ 96BN_copy(&to->numerator, &from->numerator); 97BN_copy(&to->denominator, &from->denominator); 98return to; 99} 100 101static struct fraction *add(struct fraction *result,struct fraction *left,struct fraction *right) 102{ 103 BIGNUM a, b, gcd; 104 105BN_init(&a); 106BN_init(&b); 107BN_init(&gcd); 108 109BN_mul(&a, &left->numerator, &right->denominator,get_BN_CTX()); 110BN_mul(&b, &left->denominator, &right->numerator,get_BN_CTX()); 111BN_mul(&result->denominator, &left->denominator, &right->denominator,get_BN_CTX()); 112BN_add(&result->numerator, &a, &b); 113 114BN_gcd(&gcd, &result->denominator, &result->numerator,get_BN_CTX()); 115BN_div(&result->denominator, NULL, &result->denominator, &gcd,get_BN_CTX()); 116BN_div(&result->numerator, NULL, &result->numerator, &gcd,get_BN_CTX()); 117 118BN_clear(&a); 119BN_clear(&b); 120BN_clear(&gcd); 121 122return result; 123} 124 125static intcompare(struct fraction *left,struct fraction *right) 126{ 127 BIGNUM a, b; 128 129int result; 130 131BN_init(&a); 132BN_init(&b); 133 134BN_mul(&a, &left->numerator, &right->denominator,get_BN_CTX()); 135BN_mul(&b, &left->denominator, &right->numerator,get_BN_CTX()); 136 137 result =BN_cmp(&a, &b); 138 139BN_clear(&a); 140BN_clear(&b); 141 142return result; 143} 144 145struct mass_counter { 146struct fraction seen; 147struct fraction pending; 148}; 149 150static struct mass_counter *new_mass_counter(struct commit *commit,struct fraction *pending) 151{ 152struct mass_counter *mass_counter =xmalloc(sizeof(*mass_counter)); 153memset(mass_counter,0,sizeof(*mass_counter)); 154 155init_fraction(&mass_counter->seen); 156init_fraction(&mass_counter->pending); 157 158copy(&mass_counter->pending, pending); 159copy(&mass_counter->seen,get_zero()); 160 161if(commit->object.util) { 162die("multiple attempts to initialize mass counter for%s\n",sha1_to_hex(commit->object.sha1)); 163} 164 165 commit->object.util = mass_counter; 166 167return mass_counter; 168} 169 170static voidfree_mass_counter(struct mass_counter *counter) 171{ 172clear_fraction(&counter->seen); 173clear_fraction(&counter->pending); 174free(counter); 175} 176 177// 178// Finds the base commit of a list of commits. 179// 180// One property of the commit being searched for is that every commit reachable 181// from the base commit is reachable from the commits in the starting list only 182// via paths that include the base commit. 183// 184// This algorithm uses a conservation of mass approach to find the base commit. 185// 186// We start by injecting one unit of mass into the graph at each 187// of the commits in the starting list. Injecting mass into a commit 188// is achieved by adding to its pending mass counter and, if it is not already 189// enqueued, enqueuing the commit in a list of pending commits, in latest 190// commit date first order. 191// 192// The algorithm then preceeds to visit each commit in the pending queue. 193// Upon each visit, the pending mass is added to the mass already seen for that 194// commit and then divided into N equal portions, where N is the number of 195// parents of the commit being visited. The divided portions are then injected 196// into each of the parents. 197// 198// The algorithm continues until we discover a commit which has seen all the 199// mass originally injected or until we run out of things to do. 200// 201// If we find a commit that has seen all the original mass, we have found 202// the common base of all the commits in the starting list. 203// 204// The algorithm does _not_ depend on accurate timestamps for correct operation. 205// However, reasonably sane (e.g. non-random) timestamps are required in order 206// to prevent an exponential performance characteristic. The occasional 207// timestamp inaccuracy will not dramatically affect performance but may 208// result in more nodes being processed than strictly necessary. 209// 210// This procedure sets *boundary to the address of the base commit. It returns 211// non-zero if, and only if, there was a problem parsing one of the 212// commits discovered during the traversal. 213// 214static intfind_base_for_list(struct commit_list *list,struct commit **boundary) 215{ 216 217int ret =0; 218 219struct commit_list *cleaner = NULL; 220struct commit_list *pending = NULL; 221 222*boundary = NULL; 223 224struct fraction injected; 225 226init_fraction(&injected); 227 228for(; list; list = list->next) { 229 230struct commit *item = list->item; 231 232if(item->object.util) { 233die("%s:%d:%s: logic error: this should not have happened - commit%s\n", 234 __FILE__, __LINE__, __FUNCTION__,sha1_to_hex(item->object.sha1)); 235} 236 237new_mass_counter(list->item,get_one()); 238add(&injected, &injected,get_one()); 239 240commit_list_insert(list->item, &cleaner); 241commit_list_insert(list->item, &pending); 242} 243 244while(!*boundary && pending && !ret) { 245 246struct commit *latest =pop_commit(&pending); 247 248struct mass_counter *latest_node = (struct mass_counter *) latest->object.util; 249 250if((ret =parse_commit(latest))) 251continue; 252 253add(&latest_node->seen, &latest_node->seen, &latest_node->pending); 254 255int num_parents =count_parents(latest); 256 257if(num_parents) { 258 259struct fraction distribution; 260struct commit_list *parents; 261 262divide(init_fraction(&distribution), &latest_node->pending, num_parents); 263 264for(parents = latest->parents; parents; parents = parents->next) { 265 266struct commit *parent = parents->item; 267struct mass_counter *parent_node = (struct mass_counter *) parent->object.util; 268 269if(!parent_node) { 270 271 parent_node =new_mass_counter(parent, &distribution); 272 273insert_by_date(&pending, parent); 274commit_list_insert(parent, &cleaner); 275 276}else{ 277 278if(!compare(&parent_node->pending,get_zero())) { 279insert_by_date(&pending, parent); 280} 281add(&parent_node->pending, &parent_node->pending, &distribution); 282 283} 284} 285 286clear_fraction(&distribution); 287 288} 289 290if(!compare(&latest_node->seen, &injected)) { 291*boundary = latest; 292} 293 294copy(&latest_node->pending,get_zero()); 295 296} 297 298while(cleaner) { 299 300struct commit *next =pop_commit(&cleaner); 301free_mass_counter((struct mass_counter *) next->object.util); 302 next->object.util = NULL; 303 304} 305 306if(pending) 307free_commit_list(pending); 308 309clear_fraction(&injected); 310 311return ret; 312 313} 314 315 316// 317// Finds the base of an minimal, non-linear epoch, headed at head, by 318// applying the find_base_for_list to a list consisting of the parents 319// 320static intfind_base(struct commit *head,struct commit **boundary) 321{ 322int ret =0; 323struct commit_list *pending = NULL; 324struct commit_list *next; 325 326for(next = head->parents; next; next = next->next) { 327commit_list_insert(next->item, &pending); 328} 329 ret =find_base_for_list(pending, boundary); 330free_commit_list(pending); 331 332return ret; 333} 334 335// 336// This procedure traverses to the boundary of the first epoch in the epoch 337// sequence of the epoch headed at head_of_epoch. This is either the end of 338// the maximal linear epoch or the base of a minimal non-linear epoch. 339// 340// The queue of pending nodes is sorted in reverse date order and each node 341// is currently in the queue at most once. 342// 343static intfind_next_epoch_boundary(struct commit *head_of_epoch,struct commit **boundary) 344{ 345int ret; 346struct commit *item = head_of_epoch; 347 348 ret =parse_commit(item); 349if(ret) 350return ret; 351 352if(HAS_EXACTLY_ONE_PARENT(item)) { 353 354// we are at the start of a maximimal linear epoch .. traverse to the end 355 356// traverse to the end of a maximal linear epoch 357while(HAS_EXACTLY_ONE_PARENT(item) && !ret) { 358 item = item->parents->item; 359 ret =parse_commit(item); 360} 361*boundary = item; 362 363}else{ 364 365// otherwise, we are at the start of a minimal, non-linear 366// epoch - find the common base of all parents. 367 368 ret =find_base(item, boundary); 369 370} 371 372return ret; 373} 374 375// 376// Returns non-zero if parent is known to be a parent of child. 377// 378static intis_parent_of(struct commit *parent,struct commit *child) 379{ 380struct commit_list *parents; 381for(parents = child->parents; parents; parents = parents->next) { 382if(!memcmp(parent->object.sha1, parents->item->object.sha1,sizeof(parents->item->object.sha1))) 383return1; 384} 385return0; 386} 387 388// 389// Pushes an item onto the merge order stack. If the top of the stack is 390// marked as being a possible "break", we check to see whether it actually 391// is a break. 392// 393static voidpush_onto_merge_order_stack(struct commit_list **stack,struct commit *item) 394{ 395struct commit_list *top = *stack; 396if(top && (top->item->object.flags & DISCONTINUITY)) { 397if(is_parent_of(top->item, item)) { 398 top->item->object.flags &= ~DISCONTINUITY; 399} 400} 401commit_list_insert(item, stack); 402} 403 404// 405// Marks all interesting, visited commits reachable from this commit 406// as uninteresting. We stop recursing when we reach the epoch boundary, 407// an unvisited node or a node that has already been marking uninteresting. 408// This doesn't actually mark all ancestors between the start node and the 409// epoch boundary uninteresting, but does ensure that they will 410// eventually be marked uninteresting when the main sort_first_epoch 411// traversal eventually reaches them. 412// 413static voidmark_ancestors_uninteresting(struct commit *commit) 414{ 415unsigned int flags = commit->object.flags; 416int visited = flags & VISITED; 417int boundary = flags & BOUNDARY; 418int uninteresting = flags & UNINTERESTING; 419 420 commit->object.flags |= UNINTERESTING; 421if(uninteresting || boundary || !visited) { 422return; 423 424// we only need to recurse if 425// we are not on the boundary, and, 426// we have not already been marked uninteresting, and, 427// we have already been visited. 428 429// 430// the main sort_first_epoch traverse will 431// mark unreachable all uninteresting, unvisited parents 432// as they are visited so there is no need to duplicate 433// that traversal here. 434// 435// similarly, if we are already marked uninteresting 436// then either all ancestors have already been marked 437// uninteresting or will be once the sort_first_epoch 438// traverse reaches them. 439// 440} 441 442struct commit_list *next; 443 444for(next = commit->parents; next; next = next->next) 445mark_ancestors_uninteresting(next->item); 446} 447 448// 449// Sorts the nodes of the first epoch of the epoch sequence of the epoch headed at head 450// into merge order. 451// 452static voidsort_first_epoch(struct commit *head,struct commit_list **stack) 453{ 454struct commit_list *parents; 455struct commit_list *reversed_parents = NULL; 456 457 head->object.flags |= VISITED; 458 459// 460// parse_commit builds the parent list in reverse order with respect to the order of 461// the git-commit-tree arguments. 462// 463// so we need to reverse this list to output the oldest (or most "local") commits last. 464// 465 466for(parents = head->parents; parents; parents = parents->next) 467commit_list_insert(parents->item, &reversed_parents); 468 469// 470// todo: by sorting the parents in a different order, we can alter the 471// merge order to show contemporaneous changes in parallel branches 472// occurring after "local" changes. This is useful for a developer 473// when a developer wants to see all changes that were incorporated 474// into the same merge as her own changes occur after her own 475// changes. 476// 477 478while(reversed_parents) { 479 480struct commit *parent =pop_commit(&reversed_parents); 481 482if(head->object.flags & UNINTERESTING) { 483// propagates the uninteresting bit to 484// all parents. if we have already visited 485// this parent, then the uninteresting bit 486// will be propagated to each reachable 487// commit that is still not marked uninteresting 488// and won't otherwise be reached. 489mark_ancestors_uninteresting(parent); 490} 491 492if(!(parent->object.flags & VISITED)) { 493if(parent->object.flags & BOUNDARY) { 494 495if(*stack) { 496die("something else is on the stack -%s\n",sha1_to_hex((*stack)->item->object.sha1)); 497} 498 499push_onto_merge_order_stack(stack, parent); 500 parent->object.flags |= VISITED; 501 502}else{ 503 504sort_first_epoch(parent, stack); 505 506if(reversed_parents) { 507// 508// this indicates a possible discontinuity 509// it may not be be actual discontinuity if 510// the head of parent N happens to be the tail 511// of parent N+1 512// 513// the next push onto the stack will resolve the 514// question 515// 516(*stack)->item->object.flags |= DISCONTINUITY; 517} 518} 519} 520} 521 522push_onto_merge_order_stack(stack, head); 523} 524 525// 526// Emit the contents of the stack. 527// 528// The stack is freed and replaced by NULL. 529// 530// Sets the return value to STOP if no further output should be generated. 531// 532static intemit_stack(struct commit_list **stack, emitter_func emitter) 533{ 534unsigned int seen =0; 535int action = CONTINUE; 536 537while(*stack && (action != STOP)) { 538 539struct commit *next =pop_commit(stack); 540 541 seen |= next->object.flags; 542 543if(*stack) { 544 action = (*emitter) (next); 545} 546} 547 548if(*stack) { 549free_commit_list(*stack); 550*stack = NULL; 551} 552 553return(action == STOP || (seen & UNINTERESTING)) ? STOP : CONTINUE; 554} 555 556// 557// Sorts an arbitrary epoch into merge order by sorting each epoch 558// of its epoch sequence into order. 559// 560// Note: this algorithm currently leaves traces of its execution in the 561// object flags of nodes it discovers. This should probably be fixed. 562// 563static intsort_in_merge_order(struct commit *head_of_epoch, emitter_func emitter) 564{ 565struct commit *next = head_of_epoch; 566int ret =0; 567int action = CONTINUE; 568 569 ret =parse_commit(head_of_epoch); 570 571while(next && next->parents && !ret && (action != STOP)) { 572 573struct commit *base = NULL; 574 575if((ret =find_next_epoch_boundary(next, &base))) 576return ret; 577 578 next->object.flags |= BOUNDARY; 579if(base) { 580 base->object.flags |= BOUNDARY; 581} 582 583if(HAS_EXACTLY_ONE_PARENT(next)) { 584 585while(HAS_EXACTLY_ONE_PARENT(next) 586&& (action != STOP) 587&& !ret) { 588 589if(next->object.flags & UNINTERESTING) { 590 action = STOP; 591}else{ 592 action = (*emitter) (next); 593} 594 595if(action != STOP) { 596 next = next->parents->item; 597 ret =parse_commit(next); 598} 599} 600 601}else{ 602 603struct commit_list *stack = NULL; 604sort_first_epoch(next, &stack); 605 action =emit_stack(&stack, emitter); 606 next = base; 607 608} 609 610} 611 612if(next && (action != STOP) && !ret) { 613(*emitter) (next); 614} 615 616return ret; 617} 618 619// 620// Sorts the nodes reachable from a starting list in merge order, we 621// first find the base for the starting list and then sort all nodes in this 622// subgraph using the sort_first_epoch algorithm. Once we have reached the base 623// we can continue sorting using sort_in_merge_order. 624// 625intsort_list_in_merge_order(struct commit_list *list, emitter_func emitter) 626{ 627struct commit_list *stack = NULL; 628struct commit *base; 629 630int ret =0; 631int action = CONTINUE; 632 633struct commit_list *reversed = NULL; 634 635for(; list; list = list->next) { 636 637struct commit *next = list->item; 638 639if(!(next->object.flags & UNINTERESTING)) { 640if(next->object.flags & DUPCHECK) { 641fprintf(stderr,"%s: duplicate commit%signored\n", __FUNCTION__,sha1_to_hex(next->object.sha1)); 642}else{ 643 next->object.flags |= DUPCHECK; 644commit_list_insert(list->item, &reversed); 645} 646} 647} 648 649if(!reversed->next) { 650 651// if there is only one element in the list, we can sort it using 652// sort_in_merge_order. 653 654 base = reversed->item; 655 656}else{ 657 658// otherwise, we search for the base of the list 659 660if((ret =find_base_for_list(reversed, &base))) 661return ret; 662 663if(base) { 664 base->object.flags |= BOUNDARY; 665} 666 667while(reversed) { 668sort_first_epoch(pop_commit(&reversed), &stack); 669if(reversed) { 670// 671// if we have more commits to push, then the 672// first push for the next parent may (or may not) 673// represent a discontinuity with respect to the 674// parent currently on the top of the stack. 675// 676// mark it for checking here, and check it 677// with the next push...see sort_first_epoch for 678// more details. 679// 680 stack->item->object.flags |= DISCONTINUITY; 681} 682} 683 684 action =emit_stack(&stack, emitter); 685} 686 687if(base && (action != STOP)) { 688 ret =sort_in_merge_order(base, emitter); 689} 690 691return ret; 692}