pack-redundant.con commit GIT 0.99.9j aka 1.0rc3 (27dedf0)
   1/*
   2*
   3* Copyright 2005, Lukas Sandstrom <lukass@etek.chalmers.se>
   4*
   5* This file is licensed under the GPL v2.
   6*
   7*/
   8
   9#include "cache.h"
  10
  11static const char pack_redundant_usage[] =
  12"git-pack-redundant [ --verbose ] [ --alt-odb ] < --all | <.pack filename> ...>";
  13
  14int load_all_packs = 0, verbose = 0, alt_odb = 0;
  15
  16struct llist_item {
  17        struct llist_item *next;
  18        char *sha1;
  19};
  20struct llist {
  21        struct llist_item *front;
  22        struct llist_item *back;
  23        size_t size;
  24} *all_objects; /* all objects which must be present in local packfiles */
  25
  26struct pack_list {
  27        struct pack_list *next;
  28        struct packed_git *pack;
  29        struct llist *unique_objects;
  30        struct llist *all_objects;
  31} *local_packs = NULL, *altodb_packs = NULL;
  32
  33struct pll {
  34        struct pll *next;
  35        struct pack_list *pl;
  36};
  37
  38inline void llist_free(struct llist *list)
  39{
  40        while((list->back = list->front)) {
  41                list->front = list->front->next;
  42                free(list->back);
  43        }
  44        free(list);
  45}
  46
  47inline void llist_init(struct llist **list)
  48{
  49        *list = xmalloc(sizeof(struct llist));
  50        (*list)->front = (*list)->back = NULL;
  51        (*list)->size = 0;
  52}
  53
  54struct llist * llist_copy(struct llist *list)
  55{
  56        struct llist *ret;
  57        struct llist_item *new, *old, *prev;
  58        
  59        llist_init(&ret);
  60
  61        if ((ret->size = list->size) == 0)
  62                return ret;
  63
  64        new = ret->front = xmalloc(sizeof(struct llist_item));
  65        new->sha1 = list->front->sha1;
  66
  67        old = list->front->next;
  68        while (old) {
  69                prev = new;
  70                new = xmalloc(sizeof(struct llist_item));
  71                prev->next = new;
  72                new->sha1 = old->sha1;
  73                old = old->next;
  74        }
  75        new->next = NULL;
  76        ret->back = new;
  77        
  78        return ret;
  79}
  80
  81inline struct llist_item * llist_insert(struct llist *list,
  82                                        struct llist_item *after, char *sha1)
  83{
  84        struct llist_item *new = xmalloc(sizeof(struct llist_item));
  85        new->sha1 = sha1;
  86        new->next = NULL;
  87
  88        if (after != NULL) {
  89                new->next = after->next;
  90                after->next = new;
  91                if (after == list->back)
  92                        list->back = new;
  93        } else {/* insert in front */
  94                if (list->size == 0)
  95                        list->back = new;
  96                else
  97                        new->next = list->front;
  98                list->front = new;
  99        }
 100        list->size++;
 101        return new;
 102}
 103
 104inline struct llist_item * llist_insert_back(struct llist *list, char *sha1)
 105{
 106        return llist_insert(list, list->back, sha1);
 107}
 108
 109inline struct llist_item * llist_insert_sorted_unique(struct llist *list,
 110                                        char *sha1, struct llist_item *hint)
 111{
 112        struct llist_item *prev = NULL, *l;
 113
 114        l = (hint == NULL) ? list->front : hint;
 115        while (l) {
 116                int cmp = memcmp(l->sha1, sha1, 20);
 117                if (cmp > 0) { /* we insert before this entry */
 118                        return llist_insert(list, prev, sha1);
 119                }
 120                if(!cmp) { /* already exists */
 121                        return l;
 122                }
 123                prev = l;
 124                l = l->next;
 125        }
 126        /* insert at the end */
 127        return llist_insert_back(list, sha1);
 128}
 129
 130/* returns a pointer to an item in front of sha1 */
 131inline struct llist_item * llist_sorted_remove(struct llist *list, char *sha1,
 132                                               struct llist_item *hint)
 133{
 134        struct llist_item *prev, *l;
 135
 136redo_from_start:
 137        l = (hint == NULL) ? list->front : hint;
 138        prev = NULL;
 139        while (l) {
 140                int cmp = memcmp(l->sha1, sha1, 20);
 141                if (cmp > 0) /* not in list, since sorted */
 142                        return prev;
 143                if(!cmp) { /* found */
 144                        if (prev == NULL) {
 145                                if (hint != NULL && hint != list->front) {
 146                                        /* we don't know the previous element */
 147                                        hint = NULL;
 148                                        goto redo_from_start;
 149                                }
 150                                list->front = l->next;
 151                        } else
 152                                prev->next = l->next;
 153                        if (l == list->back)
 154                                list->back = prev;
 155                        free(l);
 156                        list->size--;
 157                        return prev;
 158                }
 159                prev = l;
 160                l = l->next;
 161        }
 162        return prev;
 163}
 164
 165/* computes A\B */
 166void llist_sorted_difference_inplace(struct llist *A,
 167                                     struct llist *B)
 168{
 169        struct llist_item *hint, *b;
 170
 171        hint = NULL;
 172        b = B->front;
 173
 174        while (b) {
 175                hint = llist_sorted_remove(A, b->sha1, hint);
 176                b = b->next;
 177        }
 178}
 179
 180inline struct pack_list * pack_list_insert(struct pack_list **pl,
 181                                           struct pack_list *entry)
 182{
 183        struct pack_list *p = xmalloc(sizeof(struct pack_list));
 184        memcpy(p, entry, sizeof(struct pack_list));
 185        p->next = *pl;
 186        *pl = p;
 187        return p;
 188}
 189
 190inline size_t pack_list_size(struct pack_list *pl)
 191{
 192        size_t ret = 0;
 193        while(pl) {
 194                ret++;
 195                pl = pl->next;
 196        }
 197        return ret;
 198}
 199
 200struct pack_list * pack_list_difference(struct pack_list *A,
 201                                        struct pack_list *B)
 202{
 203        struct pack_list *ret, *pl;
 204
 205        if (A == NULL)
 206                return NULL;
 207
 208        pl = B;
 209        while (pl != NULL) {
 210                if (A->pack == pl->pack)
 211                        return pack_list_difference(A->next, B);
 212                pl = pl->next;
 213        }
 214        ret = xmalloc(sizeof(struct pack_list));
 215        memcpy(ret, A, sizeof(struct pack_list));
 216        ret->next = pack_list_difference(A->next, B);
 217        return ret;
 218}
 219
 220void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
 221{
 222        int p1_off, p2_off;
 223        void *p1_base, *p2_base;
 224        struct llist_item *p1_hint = NULL, *p2_hint = NULL;
 225        
 226        p1_off = p2_off = 256 * 4 + 4;
 227        p1_base = (void *)p1->pack->index_base;
 228        p2_base = (void *)p2->pack->index_base;
 229
 230        while (p1_off <= p1->pack->index_size - 3 * 20 &&
 231               p2_off <= p2->pack->index_size - 3 * 20)
 232        {
 233                int cmp = memcmp(p1_base + p1_off, p2_base + p2_off, 20);
 234                /* cmp ~ p1 - p2 */
 235                if (cmp == 0) {
 236                        p1_hint = llist_sorted_remove(p1->unique_objects,
 237                                        p1_base + p1_off, p1_hint);
 238                        p2_hint = llist_sorted_remove(p2->unique_objects,
 239                                        p1_base + p1_off, p2_hint);
 240                        p1_off+=24;
 241                        p2_off+=24;
 242                        continue;
 243                }
 244                if (cmp < 0) { /* p1 has the object, p2 doesn't */
 245                        p1_off+=24;
 246                } else { /* p2 has the object, p1 doesn't */
 247                        p2_off+=24;
 248                }
 249        }
 250}
 251
 252/* all the permutations have to be free()d at the same time,
 253 * since they refer to each other
 254 */
 255struct pll * get_all_permutations(struct pack_list *list)
 256{
 257        struct pll *subset, *pll, *new_pll = NULL; /*silence warning*/
 258
 259        if (list == NULL)
 260                return NULL;
 261
 262        if (list->next == NULL) {
 263                new_pll = xmalloc(sizeof(struct pll));
 264                new_pll->next = NULL;
 265                new_pll->pl = list;
 266                return new_pll;
 267        }
 268
 269        pll = subset = get_all_permutations(list->next);
 270        while (pll) {
 271                new_pll = xmalloc(sizeof(struct pll));
 272                new_pll->next = pll->next;
 273                pll->next = new_pll;
 274
 275                new_pll->pl = xmalloc(sizeof(struct pack_list));
 276                memcpy(new_pll->pl, list, sizeof(struct pack_list));
 277                new_pll->pl->next = pll->pl;
 278
 279                pll = new_pll->next;
 280        }
 281        /* add ourself to the end */
 282        new_pll->next = xmalloc(sizeof(struct pll));
 283        new_pll->next->pl = xmalloc(sizeof(struct pack_list));
 284        new_pll->next->next = NULL;
 285        memcpy(new_pll->next->pl, list, sizeof(struct pack_list));
 286        new_pll->next->pl->next = NULL;
 287
 288        return subset;
 289}
 290
 291int is_superset(struct pack_list *pl, struct llist *list)
 292{
 293        struct llist *diff;
 294
 295        diff = llist_copy(list);
 296
 297        while (pl) {
 298                llist_sorted_difference_inplace(diff,
 299                                                pl->all_objects);
 300                if (diff->size == 0) { /* we're done */
 301                        llist_free(diff);
 302                        return 1;
 303                }
 304                pl = pl->next;
 305        }
 306        llist_free(diff);
 307        return 0;
 308}
 309
 310size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
 311{
 312        size_t ret = 0;
 313        int p1_off, p2_off;
 314        void *p1_base, *p2_base;
 315
 316        p1_off = p2_off = 256 * 4 + 4;
 317        p1_base = (void *)p1->index_base;
 318        p2_base = (void *)p2->index_base;
 319
 320        while (p1_off <= p1->index_size - 3 * 20 &&
 321               p2_off <= p2->index_size - 3 * 20)
 322        {
 323                int cmp = memcmp(p1_base + p1_off, p2_base + p2_off, 20);
 324                /* cmp ~ p1 - p2 */
 325                if (cmp == 0) {
 326                        ret++;
 327                        p1_off+=24;
 328                        p2_off+=24;
 329                        continue;
 330                }
 331                if (cmp < 0) { /* p1 has the object, p2 doesn't */
 332                        p1_off+=24;
 333                } else { /* p2 has the object, p1 doesn't */
 334                        p2_off+=24;
 335                }
 336        }
 337        return ret;
 338}
 339
 340/* another O(n^2) function ... */
 341size_t get_pack_redundancy(struct pack_list *pl)
 342{
 343        struct pack_list *subset;
 344
 345        if (pl == NULL)
 346                return 0;
 347
 348        size_t ret = 0;
 349        while ((subset = pl->next)) {
 350                while(subset) {
 351                        ret += sizeof_union(pl->pack, subset->pack);
 352                        subset = subset->next;
 353                }
 354                pl = pl->next;
 355        }
 356        return ret;
 357}
 358
 359inline size_t pack_set_bytecount(struct pack_list *pl)
 360{
 361        size_t ret = 0;
 362        while (pl) {
 363                ret += pl->pack->pack_size;
 364                ret += pl->pack->index_size;
 365                pl = pl->next;
 366        }
 367        return ret;
 368}
 369
 370void minimize(struct pack_list **min)
 371{
 372        struct pack_list *pl, *unique = NULL,
 373                *non_unique = NULL, *min_perm = NULL;
 374        struct pll *perm, *perm_all, *perm_ok = NULL, *new_perm;
 375        struct llist *missing;
 376        size_t min_perm_size = (size_t)-1, perm_size;
 377
 378        pl = local_packs;
 379        while (pl) {
 380                if(pl->unique_objects->size)
 381                        pack_list_insert(&unique, pl);
 382                else
 383                        pack_list_insert(&non_unique, pl);
 384                pl = pl->next;
 385        }
 386        /* find out which objects are missing from the set of unique packs */
 387        missing = llist_copy(all_objects);
 388        pl = unique;
 389        while (pl) {
 390                llist_sorted_difference_inplace(missing,
 391                                                pl->all_objects);
 392                pl = pl->next;
 393        }
 394
 395        /* return if there are no objects missing from the unique set */
 396        if (missing->size == 0) {
 397                *min = unique;
 398                return;
 399        }
 400
 401        /* find the permutations which contain all missing objects */
 402        perm_all = perm = get_all_permutations(non_unique);
 403        while (perm) {
 404                if (is_superset(perm->pl, missing)) {
 405                        new_perm = xmalloc(sizeof(struct pll));
 406                        new_perm->pl = perm->pl;
 407                        new_perm->next = perm_ok;
 408                        perm_ok = new_perm;
 409                }
 410                perm = perm->next;
 411        }
 412        
 413        if (perm_ok == NULL)
 414                die("Internal error: No complete sets found!\n");
 415
 416        /* find the permutation with the smallest size */
 417        perm = perm_ok;
 418        while (perm) {
 419                perm_size = pack_set_bytecount(perm->pl);
 420                if (min_perm_size > perm_size) {
 421                        min_perm_size = perm_size;
 422                        min_perm = perm->pl;
 423                }
 424                perm = perm->next;
 425        }
 426        *min = min_perm;
 427        /* add the unique packs to the list */
 428        pl = unique;
 429        while(pl) {
 430                pack_list_insert(min, pl);
 431                pl = pl->next;
 432        }
 433}
 434
 435void load_all_objects()
 436{
 437        struct pack_list *pl = local_packs;
 438        struct llist_item *hint, *l;
 439        int i;
 440
 441        llist_init(&all_objects);
 442
 443        while (pl) {
 444                i = 0;
 445                hint = NULL;
 446                l = pl->all_objects->front;
 447                while (l) {
 448                        hint = llist_insert_sorted_unique(all_objects,
 449                                                          l->sha1, hint);
 450                        l = l->next;
 451                }
 452                pl = pl->next;
 453        }
 454        /* remove objects present in remote packs */
 455        pl = altodb_packs;
 456        while (pl) {
 457                llist_sorted_difference_inplace(all_objects, pl->all_objects);
 458                pl = pl->next;
 459        }
 460}
 461
 462/* this scales like O(n^2) */
 463void cmp_packs()
 464{
 465        struct pack_list *subset, *pl = local_packs;
 466
 467        while ((subset = pl)) {
 468                while((subset = subset->next))
 469                        cmp_two_packs(pl, subset);
 470                pl = pl->next;
 471        }
 472
 473        pl = altodb_packs;
 474        while (pl) {
 475                subset = local_packs;
 476                while (subset) {
 477                        llist_sorted_difference_inplace(subset->unique_objects,
 478                                                        pl->all_objects);
 479                        subset = subset->next;
 480                }
 481                pl = pl->next;
 482        }
 483}
 484
 485struct pack_list * add_pack(struct packed_git *p)
 486{
 487        struct pack_list l;
 488        size_t off;
 489        void *base;
 490
 491        l.pack = p;
 492        llist_init(&l.all_objects);
 493
 494        off = 256 * 4 + 4;
 495        base = (void *)p->index_base;
 496        while (off <= p->index_size - 3 * 20) {
 497                llist_insert_back(l.all_objects, base + off);
 498                off+=24;
 499        }
 500        /* this list will be pruned in cmp_two_packs later */
 501        l.unique_objects = llist_copy(l.all_objects);
 502        if (p->pack_local)
 503                return pack_list_insert(&local_packs, &l);
 504        else
 505                return alt_odb ? pack_list_insert(&altodb_packs, &l) : NULL;
 506}
 507
 508struct pack_list * add_pack_file(char *filename)
 509{
 510        struct packed_git *p = packed_git;
 511
 512        if (strlen(filename) < 40)
 513                die("Bad pack filename: %s\n", filename);
 514
 515        while (p) {
 516                if (strstr(p->pack_name, filename))
 517                        return add_pack(p);
 518                p = p->next;
 519        }
 520        die("Filename %s not found in packed_git\n", filename);
 521}
 522
 523void load_all()
 524{
 525        struct packed_git *p = packed_git;
 526
 527        while (p) {
 528                add_pack(p);
 529                p = p->next;
 530        }
 531}
 532
 533int main(int argc, char **argv)
 534{
 535        int i;
 536        struct pack_list *min, *red, *pl;
 537
 538        for (i = 1; i < argc; i++) {
 539                const char *arg = argv[i];
 540                if(!strcmp(arg, "--")) {
 541                        i++;
 542                        break;
 543                }
 544                if(!strcmp(arg, "--all")) {
 545                        load_all_packs = 1;
 546                        continue;
 547                }
 548                if(!strcmp(arg, "--verbose")) {
 549                        verbose = 1;
 550                        continue;
 551                }
 552                if(!strcmp(arg, "--alt-odb")) {
 553                        alt_odb = 1;
 554                        continue;
 555                }
 556                if(*arg == '-')
 557                        usage(pack_redundant_usage);
 558                else
 559                        break;
 560        }
 561
 562        prepare_packed_git();
 563
 564        if (load_all_packs)
 565                load_all();
 566        else
 567                while (*(argv + i) != NULL)
 568                        add_pack_file(*(argv + i++));
 569
 570        if (local_packs == NULL)
 571                die("Zero packs found!\n");
 572
 573        cmp_packs();
 574
 575        load_all_objects();
 576
 577        minimize(&min);
 578        if (verbose) {
 579                fprintf(stderr, "There are %lu packs available in alt-odbs.\n",
 580                        (unsigned long)pack_list_size(altodb_packs));
 581                fprintf(stderr, "The smallest (bytewise) set of packs is:\n");
 582                pl = min;
 583                while (pl) {
 584                        fprintf(stderr, "\t%s\n", pl->pack->pack_name);
 585                        pl = pl->next;
 586                }
 587                fprintf(stderr, "containing %lu duplicate objects "
 588                                "with a total size of %lukb.\n",
 589                        (unsigned long)get_pack_redundancy(min),
 590                        (unsigned long)pack_set_bytecount(min)/1024);
 591                fprintf(stderr, "A total of %lu unique objects were considered.\n",
 592                        (unsigned long)all_objects->size);
 593                fprintf(stderr, "Redundant packs (with indexes):\n");
 594        }
 595        pl = red = pack_list_difference(local_packs, min);
 596        while (pl) {
 597                printf("%s\n%s\n",
 598                       sha1_pack_index_name(pl->pack->sha1),
 599                       pl->pack->pack_name);
 600                pl = pl->next;
 601        }
 602
 603        return 0;
 604}