pack-redundant.con commit Rename .git/lost+found to .git/lost-found. (0720365)
   1/*
   2*
   3* Copyright 2005, Lukas Sandstrom <lukass@etek.chalmers.se>
   4*
   5* This file is licensed under the GPL v2.
   6*
   7*/
   8
   9#include "cache.h"
  10
  11static const char pack_redundant_usage[] =
  12"git-pack-redundant [ --verbose ] [ --alt-odb ] < --all | <.pack filename> ...>";
  13
  14int load_all_packs = 0, verbose = 0, alt_odb = 0;
  15
  16struct llist_item {
  17        struct llist_item *next;
  18        char *sha1;
  19};
  20struct llist {
  21        struct llist_item *front;
  22        struct llist_item *back;
  23        size_t size;
  24} *all_objects; /* all objects which must be present in local packfiles */
  25
  26struct pack_list {
  27        struct pack_list *next;
  28        struct packed_git *pack;
  29        struct llist *unique_objects;
  30        struct llist *all_objects;
  31} *local_packs = NULL, *altodb_packs = NULL;
  32
  33struct pll {
  34        struct pll *next;
  35        struct pack_list *pl;
  36};
  37
  38inline void llist_free(struct llist *list)
  39{
  40        while((list->back = list->front)) {
  41                list->front = list->front->next;
  42                free(list->back);
  43        }
  44        free(list);
  45}
  46
  47inline void llist_init(struct llist **list)
  48{
  49        *list = xmalloc(sizeof(struct llist));
  50        (*list)->front = (*list)->back = NULL;
  51        (*list)->size = 0;
  52}
  53
  54struct llist * llist_copy(struct llist *list)
  55{
  56        struct llist *ret;
  57        struct llist_item *new, *old, *prev;
  58        
  59        llist_init(&ret);
  60
  61        if ((ret->size = list->size) == 0)
  62                return ret;
  63
  64        new = ret->front = xmalloc(sizeof(struct llist_item));
  65        new->sha1 = list->front->sha1;
  66
  67        old = list->front->next;
  68        while (old) {
  69                prev = new;
  70                new = xmalloc(sizeof(struct llist_item));
  71                prev->next = new;
  72                new->sha1 = old->sha1;
  73                old = old->next;
  74        }
  75        new->next = NULL;
  76        ret->back = new;
  77        
  78        return ret;
  79}
  80
  81inline struct llist_item * llist_insert(struct llist *list,
  82                                        struct llist_item *after, char *sha1)
  83{
  84        struct llist_item *new = xmalloc(sizeof(struct llist_item));
  85        new->sha1 = sha1;
  86        new->next = NULL;
  87
  88        if (after != NULL) {
  89                new->next = after->next;
  90                after->next = new;
  91                if (after == list->back)
  92                        list->back = new;
  93        } else {/* insert in front */
  94                if (list->size == 0)
  95                        list->back = new;
  96                else
  97                        new->next = list->front;
  98                list->front = new;
  99        }
 100        list->size++;
 101        return new;
 102}
 103
 104inline struct llist_item * llist_insert_back(struct llist *list, char *sha1)
 105{
 106        return llist_insert(list, list->back, sha1);
 107}
 108
 109inline struct llist_item * llist_insert_sorted_unique(struct llist *list,
 110                                        char *sha1, struct llist_item *hint)
 111{
 112        struct llist_item *prev = NULL, *l;
 113
 114        l = (hint == NULL) ? list->front : hint;
 115        while (l) {
 116                int cmp = memcmp(l->sha1, sha1, 20);
 117                if (cmp > 0) { /* we insert before this entry */
 118                        return llist_insert(list, prev, sha1);
 119                }
 120                if(!cmp) { /* already exists */
 121                        return l;
 122                }
 123                prev = l;
 124                l = l->next;
 125        }
 126        /* insert at the end */
 127        return llist_insert_back(list, sha1);
 128}
 129
 130/* computes A\B */
 131void llist_sorted_difference_inplace(struct llist *A,
 132                                     struct llist *B)
 133{
 134        struct llist_item *prev, *a, *b, *x;
 135
 136        prev = a = A->front;
 137        b = B->front;
 138
 139        while (a != NULL && b != NULL) {
 140                int cmp = memcmp(a->sha1, b->sha1, 20);
 141                if (!cmp) {
 142                        x = a;
 143                        if (a == A->front)
 144                                A->front = a->next;
 145                        a = prev->next = a->next;
 146
 147                        if (a == NULL) /* end of list */
 148                                A->back = prev;
 149                        A->size--;
 150                        free(x);
 151                        b = b->next;
 152                } else
 153                        if (cmp > 0)
 154                                b = b->next;
 155                        else {
 156                                prev = a;
 157                                a = a->next;
 158                        }
 159        }
 160}
 161
 162/* returns a pointer to an item in front of sha1 */
 163inline struct llist_item * llist_sorted_remove(struct llist *list, char *sha1,
 164                                               struct llist_item *hint)
 165{
 166        struct llist_item *prev, *l;
 167
 168redo_from_start:
 169        l = (hint == NULL) ? list->front : hint;
 170        prev = NULL;
 171        while (l) {
 172                int cmp = memcmp(l->sha1, sha1, 20);
 173                if (cmp > 0) /* not in list, since sorted */
 174                        return prev;
 175                if(!cmp) { /* found */
 176                        if (prev == NULL) {
 177                                if (hint != NULL && hint != list->front) {
 178                                        /* we don't know the previous element */
 179                                        hint = NULL;
 180                                        goto redo_from_start;
 181                                }
 182                                list->front = l->next;
 183                        } else
 184                                prev->next = l->next;
 185                        if (l == list->back)
 186                                list->back = prev;
 187                        free(l);
 188                        list->size--;
 189                        return prev;
 190                }
 191                prev = l;
 192                l = l->next;
 193        }
 194        return prev;
 195}
 196
 197inline struct pack_list * pack_list_insert(struct pack_list **pl,
 198                                           struct pack_list *entry)
 199{
 200        struct pack_list *p = xmalloc(sizeof(struct pack_list));
 201        memcpy(p, entry, sizeof(struct pack_list));
 202        p->next = *pl;
 203        *pl = p;
 204        return p;
 205}
 206
 207inline size_t pack_list_size(struct pack_list *pl)
 208{
 209        size_t ret = 0;
 210        while(pl) {
 211                ret++;
 212                pl = pl->next;
 213        }
 214        return ret;
 215}
 216
 217struct pack_list * pack_list_difference(struct pack_list *A,
 218                                        struct pack_list *B)
 219{
 220        struct pack_list *ret, *pl;
 221
 222        if (A == NULL)
 223                return NULL;
 224
 225        pl = B;
 226        while (pl != NULL) {
 227                if (A->pack == pl->pack)
 228                        return pack_list_difference(A->next, B);
 229                pl = pl->next;
 230        }
 231        ret = xmalloc(sizeof(struct pack_list));
 232        memcpy(ret, A, sizeof(struct pack_list));
 233        ret->next = pack_list_difference(A->next, B);
 234        return ret;
 235}
 236
 237void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 238{
 239        int p1_off, p2_off;
 240        void *p1_base, *p2_base;
 241        struct llist_item *p1_hint = NULL, *p2_hint = NULL;
 242        
 243        p1_off = p2_off = 256 * 4 + 4;
 244        p1_base = (void *)p1->pack->index_base;
 245        p2_base = (void *)p2->pack->index_base;
 246
 247        while (p1_off <= p1->pack->index_size - 3 * 20 &&
 248               p2_off <= p2->pack->index_size - 3 * 20)
 249        {
 250                int cmp = memcmp(p1_base + p1_off, p2_base + p2_off, 20);
 251                /* cmp ~ p1 - p2 */
 252                if (cmp == 0) {
 253                        p1_hint = llist_sorted_remove(p1->unique_objects,
 254                                        p1_base + p1_off, p1_hint);
 255                        p2_hint = llist_sorted_remove(p2->unique_objects,
 256                                        p1_base + p1_off, p2_hint);
 257                        p1_off+=24;
 258                        p2_off+=24;
 259                        continue;
 260                }
 261                if (cmp < 0) { /* p1 has the object, p2 doesn't */
 262                        p1_off+=24;
 263                } else { /* p2 has the object, p1 doesn't */
 264                        p2_off+=24;
 265                }
 266        }
 267}
 268
 269/* all the permutations have to be free()d at the same time,
 270 * since they refer to each other
 271 */
 272struct pll * get_all_permutations(struct pack_list *list)
 273{
 274        struct pll *subset, *pll, *new_pll = NULL; /*silence warning*/
 275
 276        if (list == NULL)
 277                return NULL;
 278
 279        if (list->next == NULL) {
 280                new_pll = xmalloc(sizeof(struct pll));
 281                new_pll->next = NULL;
 282                new_pll->pl = list;
 283                return new_pll;
 284        }
 285
 286        pll = subset = get_all_permutations(list->next);
 287        while (pll) {
 288                new_pll = xmalloc(sizeof(struct pll));
 289                new_pll->next = pll->next;
 290                pll->next = new_pll;
 291
 292                new_pll->pl = xmalloc(sizeof(struct pack_list));
 293                memcpy(new_pll->pl, list, sizeof(struct pack_list));
 294                new_pll->pl->next = pll->pl;
 295
 296                pll = new_pll->next;
 297        }
 298        /* add ourself to the end */
 299        new_pll->next = xmalloc(sizeof(struct pll));
 300        new_pll->next->pl = xmalloc(sizeof(struct pack_list));
 301        new_pll->next->next = NULL;
 302        memcpy(new_pll->next->pl, list, sizeof(struct pack_list));
 303        new_pll->next->pl->next = NULL;
 304
 305        return subset;
 306}
 307
 308int is_superset(struct pack_list *pl, struct llist *list)
 309{
 310        struct llist *diff;
 311
 312        diff = llist_copy(list);
 313
 314        while (pl) {
 315                llist_sorted_difference_inplace(diff,
 316                                                pl->all_objects);
 317                if (diff->size == 0) { /* we're done */
 318                        llist_free(diff);
 319                        return 1;
 320                }
 321                pl = pl->next;
 322        }
 323        llist_free(diff);
 324        return 0;
 325}
 326
 327size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 328{
 329        size_t ret = 0;
 330        int p1_off, p2_off;
 331        void *p1_base, *p2_base;
 332
 333        p1_off = p2_off = 256 * 4 + 4;
 334        p1_base = (void *)p1->index_base;
 335        p2_base = (void *)p2->index_base;
 336
 337        while (p1_off <= p1->index_size - 3 * 20 &&
 338               p2_off <= p2->index_size - 3 * 20)
 339        {
 340                int cmp = memcmp(p1_base + p1_off, p2_base + p2_off, 20);
 341                /* cmp ~ p1 - p2 */
 342                if (cmp == 0) {
 343                        ret++;
 344                        p1_off+=24;
 345                        p2_off+=24;
 346                        continue;
 347                }
 348                if (cmp < 0) { /* p1 has the object, p2 doesn't */
 349                        p1_off+=24;
 350                } else { /* p2 has the object, p1 doesn't */
 351                        p2_off+=24;
 352                }
 353        }
 354        return ret;
 355}
 356
 357/* another O(n^2) function ... */
 358size_t get_pack_redundancy(struct pack_list *pl)
 359{
 360        struct pack_list *subset;
 361
 362        if (pl == NULL)
 363                return 0;
 364
 365        size_t ret = 0;
 366        while ((subset = pl->next)) {
 367                while(subset) {
 368                        ret += sizeof_union(pl->pack, subset->pack);
 369                        subset = subset->next;
 370                }
 371                pl = pl->next;
 372        }
 373        return ret;
 374}
 375
 376inline size_t pack_set_bytecount(struct pack_list *pl)
 377{
 378        size_t ret = 0;
 379        while (pl) {
 380                ret += pl->pack->pack_size;
 381                ret += pl->pack->index_size;
 382                pl = pl->next;
 383        }
 384        return ret;
 385}
 386
 387void minimize(struct pack_list **min)
 388{
 389        struct pack_list *pl, *unique = NULL,
 390                *non_unique = NULL, *min_perm = NULL;
 391        struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
 392        struct llist *missing;
 393        size_t min_perm_size = (size_t)-1, perm_size;
 394
 395        pl = local_packs;
 396        while (pl) {
 397                if(pl->unique_objects->size)
 398                        pack_list_insert(&unique, pl);
 399                else
 400                        pack_list_insert(&non_unique, pl);
 401                pl = pl->next;
 402        }
 403        /* find out which objects are missing from the set of unique packs */
 404        missing = llist_copy(all_objects);
 405        pl = unique;
 406        while (pl) {
 407                llist_sorted_difference_inplace(missing,
 408                                                pl->all_objects);
 409                pl = pl->next;
 410        }
 411
 412        /* return if there are no objects missing from the unique set */
 413        if (missing->size == 0) {
 414                *min = unique;
 415                return;
 416        }
 417
 418        /* find the permutations which contain all missing objects */
 419        perm_all = perm = get_all_permutations(non_unique);
 420        while (perm) {
 421                if (is_superset(perm->pl, missing)) {
 422                        new_perm = xmalloc(sizeof(struct pll));
 423                        new_perm->pl = perm->pl;
 424                        new_perm->next = perm_ok;
 425                        perm_ok = new_perm;
 426                }
 427                perm = perm->next;
 428        }
 429        
 430        if (perm_ok == NULL)
 431                die("Internal error: No complete sets found!\n");
 432
 433        /* find the permutation with the smallest size */
 434        perm = perm_ok;
 435        while (perm) {
 436                perm_size = pack_set_bytecount(perm->pl);
 437                if (min_perm_size > perm_size) {
 438                        min_perm_size = perm_size;
 439                        min_perm = perm->pl;
 440                }
 441                perm = perm->next;
 442        }
 443        *min = min_perm;
 444        /* add the unique packs to the list */
 445        pl = unique;
 446        while(pl) {
 447                pack_list_insert(min, pl);
 448                pl = pl->next;
 449        }
 450}
 451
 452void load_all_objects()
 453{
 454        struct pack_list *pl = local_packs;
 455        struct llist_item *hint, *l;
 456        int i;
 457
 458        llist_init(&all_objects);
 459
 460        while (pl) {
 461                i = 0;
 462                hint = NULL;
 463                l = pl->all_objects->front;
 464                while (l) {
 465                        hint = llist_insert_sorted_unique(all_objects,
 466                                                          l->sha1, hint);
 467                        l = l->next;
 468                }
 469                pl = pl->next;
 470        }
 471        /* remove objects present in remote packs */
 472        pl = altodb_packs;
 473        while (pl) {
 474                llist_sorted_difference_inplace(all_objects, pl->all_objects);
 475                pl = pl->next;
 476        }
 477}
 478
 479/* this scales like O(n^2) */
 480void cmp_packs()
 481{
 482        struct pack_list *subset, *pl = local_packs;
 483
 484        while ((subset = pl)) {
 485                while((subset = subset->next))
 486                        cmp_two_packs(pl, subset);
 487                pl = pl->next;
 488        }
 489
 490        pl = altodb_packs;
 491        while (pl) {
 492                subset = local_packs;
 493                while (subset) {
 494                        llist_sorted_difference_inplace(subset->unique_objects,
 495                                                        pl->all_objects);
 496                        subset = subset->next;
 497                }
 498                pl = pl->next;
 499        }
 500}
 501
 502struct pack_list * add_pack(struct packed_git *p)
 503{
 504        struct pack_list l;
 505        size_t off;
 506        void *base;
 507
 508        l.pack = p;
 509        llist_init(&l.all_objects);
 510
 511        off = 256 * 4 + 4;
 512        base = (void *)p->index_base;
 513        while (off <= p->index_size - 3 * 20) {
 514                llist_insert_back(l.all_objects, base + off);
 515                off+=24;
 516        }
 517        /* this list will be pruned in cmp_two_packs later */
 518        l.unique_objects = llist_copy(l.all_objects);
 519        if (p->pack_local)
 520                return pack_list_insert(&local_packs, &l);
 521        else
 522                return alt_odb ? pack_list_insert(&altodb_packs, &l) : NULL;
 523}
 524
 525struct pack_list * add_pack_file(char *filename)
 526{
 527        struct packed_git *p = packed_git;
 528
 529        if (strlen(filename) < 40)
 530                die("Bad pack filename: %s\n", filename);
 531
 532        while (p) {
 533                if (strstr(p->pack_name, filename))
 534                        return add_pack(p);
 535                p = p->next;
 536        }
 537        die("Filename %s not found in packed_git\n", filename);
 538}
 539
 540void load_all()
 541{
 542        struct packed_git *p = packed_git;
 543
 544        while (p) {
 545                add_pack(p);
 546                p = p->next;
 547        }
 548}
 549
 550int main(int argc, char **argv)
 551{
 552        int i;
 553        struct pack_list *min, *red, *pl;
 554
 555        for (i = 1; i < argc; i++) {
 556                const char *arg = argv[i];
 557                if(!strcmp(arg, "--")) {
 558                        i++;
 559                        break;
 560                }
 561                if(!strcmp(arg, "--all")) {
 562                        load_all_packs = 1;
 563                        continue;
 564                }
 565                if(!strcmp(arg, "--verbose")) {
 566                        verbose = 1;
 567                        continue;
 568                }
 569                if(!strcmp(arg, "--alt-odb")) {
 570                        alt_odb = 1;
 571                        continue;
 572                }
 573                if(*arg == '-')
 574                        usage(pack_redundant_usage);
 575                else
 576                        break;
 577        }
 578
 579        prepare_packed_git();
 580
 581        if (load_all_packs)
 582                load_all();
 583        else
 584                while (*(argv + i) != NULL)
 585                        add_pack_file(*(argv + i++));
 586
 587        if (local_packs == NULL)
 588                die("Zero packs found!\n");
 589
 590        cmp_packs();
 591
 592        load_all_objects();
 593
 594        minimize(&min);
 595        if (verbose) {
 596                fprintf(stderr, "There are %ld packs available in alt-odbs.\n",
 597                        pack_list_size(altodb_packs));
 598                fprintf(stderr, "The smallest (bytewise) set of packs is:\n");
 599                pl = min;
 600                while (pl) {
 601                        fprintf(stderr, "\t%s\n", pl->pack->pack_name);
 602                        pl = pl->next;
 603                }
 604                fprintf(stderr, "containing %ld duplicate objects "
 605                                "with a total size of %ldkb.\n",
 606                        get_pack_redundancy(min), pack_set_bytecount(min)/1024);
 607                fprintf(stderr, "A total of %ld unique objects were considered.\n",
 608                        all_objects->size);
 609                fprintf(stderr, "Redundant packs (with indexes):\n");
 610        }
 611        pl = red = pack_list_difference(local_packs, min);
 612        while (pl) {
 613                printf("%s\n%s\n",
 614                       sha1_pack_index_name(pl->pack->sha1),
 615                       pl->pack->pack_name);
 616                pl = pl->next;
 617        }
 618
 619        return 0;
 620}