fsck-objects.con commit Make git-rerere a builtin (658f365)
   1#include "cache.h"
   2#include "commit.h"
   3#include "tree.h"
   4#include "blob.h"
   5#include "tag.h"
   6#include "refs.h"
   7#include "pack.h"
   8#include "cache-tree.h"
   9#include "tree-walk.h"
  10
  11#define REACHABLE 0x0001
  12#define SEEN      0x0002
  13
  14static int show_root;
  15static int show_tags;
  16static int show_unreachable;
  17static int check_full;
  18static int check_strict;
  19static int keep_cache_objects;
  20static unsigned char head_sha1[20];
  21
  22#ifdef NO_D_INO_IN_DIRENT
  23#define SORT_DIRENT 0
  24#define DIRENT_SORT_HINT(de) 0
  25#else
  26#define SORT_DIRENT 1
  27#define DIRENT_SORT_HINT(de) ((de)->d_ino)
  28#endif
  29
  30static void objreport(struct object *obj, const char *severity,
  31                      const char *err, va_list params)
  32{
  33        fprintf(stderr, "%s in %s %s: ",
  34                severity, typename(obj->type), sha1_to_hex(obj->sha1));
  35        vfprintf(stderr, err, params);
  36        fputs("\n", stderr);
  37}
  38
  39static int objerror(struct object *obj, const char *err, ...)
  40{
  41        va_list params;
  42        va_start(params, err);
  43        objreport(obj, "error", err, params);
  44        va_end(params);
  45        return -1;
  46}
  47
  48static int objwarning(struct object *obj, const char *err, ...)
  49{
  50        va_list params;
  51        va_start(params, err);
  52        objreport(obj, "warning", err, params);
  53        va_end(params);
  54        return -1;
  55}
  56
  57
  58static void check_connectivity(void)
  59{
  60        int i, max;
  61
  62        /* Look up all the requirements, warn about missing objects.. */
  63        max = get_max_object_index();
  64        for (i = 0; i < max; i++) {
  65                const struct object_refs *refs;
  66                struct object *obj = get_indexed_object(i);
  67
  68                if (!obj)
  69                        continue;
  70
  71                if (!obj->parsed) {
  72                        if (has_sha1_file(obj->sha1))
  73                                ; /* it is in pack */
  74                        else
  75                                printf("missing %s %s\n",
  76                                       typename(obj->type), sha1_to_hex(obj->sha1));
  77                        continue;
  78                }
  79
  80                refs = lookup_object_refs(obj);
  81                if (refs) {
  82                        unsigned j;
  83                        for (j = 0; j < refs->count; j++) {
  84                                struct object *ref = refs->ref[j];
  85                                if (ref->parsed ||
  86                                    (has_sha1_file(ref->sha1)))
  87                                        continue;
  88                                printf("broken link from %7s %s\n",
  89                                       typename(obj->type), sha1_to_hex(obj->sha1));
  90                                printf("              to %7s %s\n",
  91                                       typename(ref->type), sha1_to_hex(ref->sha1));
  92                        }
  93                }
  94
  95                if (show_unreachable && !(obj->flags & REACHABLE)) {
  96                        printf("unreachable %s %s\n",
  97                               typename(obj->type), sha1_to_hex(obj->sha1));
  98                        continue;
  99                }
 100
 101                if (!obj->used) {
 102                        printf("dangling %s %s\n", typename(obj->type),
 103                               sha1_to_hex(obj->sha1));
 104                }
 105        }
 106}
 107
 108/*
 109 * The entries in a tree are ordered in the _path_ order,
 110 * which means that a directory entry is ordered by adding
 111 * a slash to the end of it.
 112 *
 113 * So a directory called "a" is ordered _after_ a file
 114 * called "a.c", because "a/" sorts after "a.c".
 115 */
 116#define TREE_UNORDERED (-1)
 117#define TREE_HAS_DUPS  (-2)
 118
 119static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, const char *name2)
 120{
 121        int len1 = strlen(name1);
 122        int len2 = strlen(name2);
 123        int len = len1 < len2 ? len1 : len2;
 124        unsigned char c1, c2;
 125        int cmp;
 126
 127        cmp = memcmp(name1, name2, len);
 128        if (cmp < 0)
 129                return 0;
 130        if (cmp > 0)
 131                return TREE_UNORDERED;
 132
 133        /*
 134         * Ok, the first <len> characters are the same.
 135         * Now we need to order the next one, but turn
 136         * a '\0' into a '/' for a directory entry.
 137         */
 138        c1 = name1[len];
 139        c2 = name2[len];
 140        if (!c1 && !c2)
 141                /*
 142                 * git-write-tree used to write out a nonsense tree that has
 143                 * entries with the same name, one blob and one tree.  Make
 144                 * sure we do not have duplicate entries.
 145                 */
 146                return TREE_HAS_DUPS;
 147        if (!c1 && S_ISDIR(mode1))
 148                c1 = '/';
 149        if (!c2 && S_ISDIR(mode2))
 150                c2 = '/';
 151        return c1 < c2 ? 0 : TREE_UNORDERED;
 152}
 153
 154static int fsck_tree(struct tree *item)
 155{
 156        int retval;
 157        int has_full_path = 0;
 158        int has_zero_pad = 0;
 159        int has_bad_modes = 0;
 160        int has_dup_entries = 0;
 161        int not_properly_sorted = 0;
 162        struct tree_desc desc;
 163        unsigned o_mode;
 164        const char *o_name;
 165        const unsigned char *o_sha1;
 166
 167        desc.buf = item->buffer;
 168        desc.size = item->size;
 169
 170        o_mode = 0;
 171        o_name = NULL;
 172        o_sha1 = NULL;
 173        while (desc.size) {
 174                unsigned mode;
 175                const char *name;
 176                const unsigned char *sha1;
 177
 178                sha1 = tree_entry_extract(&desc, &name, &mode);
 179
 180                if (strchr(name, '/'))
 181                        has_full_path = 1;
 182                has_zero_pad |= *(char *)desc.buf == '0';
 183                update_tree_entry(&desc);
 184
 185                switch (mode) {
 186                /*
 187                 * Standard modes..
 188                 */
 189                case S_IFREG | 0755:
 190                case S_IFREG | 0644:
 191                case S_IFLNK:
 192                case S_IFDIR:
 193                        break;
 194                /*
 195                 * This is nonstandard, but we had a few of these
 196                 * early on when we honored the full set of mode
 197                 * bits..
 198                 */
 199                case S_IFREG | 0664:
 200                        if (!check_strict)
 201                                break;
 202                default:
 203                        has_bad_modes = 1;
 204                }
 205
 206                if (o_name) {
 207                        switch (verify_ordered(o_mode, o_name, mode, name)) {
 208                        case TREE_UNORDERED:
 209                                not_properly_sorted = 1;
 210                                break;
 211                        case TREE_HAS_DUPS:
 212                                has_dup_entries = 1;
 213                                break;
 214                        default:
 215                                break;
 216                        }
 217                }
 218
 219                o_mode = mode;
 220                o_name = name;
 221                o_sha1 = sha1;
 222        }
 223        free(item->buffer);
 224        item->buffer = NULL;
 225
 226        retval = 0;
 227        if (has_full_path) {
 228                objwarning(&item->object, "contains full pathnames");
 229        }
 230        if (has_zero_pad) {
 231                objwarning(&item->object, "contains zero-padded file modes");
 232        }
 233        if (has_bad_modes) {
 234                objwarning(&item->object, "contains bad file modes");
 235        }
 236        if (has_dup_entries) {
 237                retval = objerror(&item->object, "contains duplicate file entries");
 238        }
 239        if (not_properly_sorted) {
 240                retval = objerror(&item->object, "not properly sorted");
 241        }
 242        return retval;
 243}
 244
 245static int fsck_commit(struct commit *commit)
 246{
 247        char *buffer = commit->buffer;
 248        unsigned char tree_sha1[20], sha1[20];
 249
 250        if (memcmp(buffer, "tree ", 5))
 251                return objerror(&commit->object, "invalid format - expected 'tree' line");
 252        if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
 253                return objerror(&commit->object, "invalid 'tree' line format - bad sha1");
 254        buffer += 46;
 255        while (!memcmp(buffer, "parent ", 7)) {
 256                if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
 257                        return objerror(&commit->object, "invalid 'parent' line format - bad sha1");
 258                buffer += 48;
 259        }
 260        if (memcmp(buffer, "author ", 7))
 261                return objerror(&commit->object, "invalid format - expected 'author' line");
 262        free(commit->buffer);
 263        commit->buffer = NULL;
 264        if (!commit->tree)
 265                return objerror(&commit->object, "could not load commit's tree %s", tree_sha1);
 266        if (!commit->parents && show_root)
 267                printf("root %s\n", sha1_to_hex(commit->object.sha1));
 268        if (!commit->date)
 269                printf("bad commit date in %s\n", 
 270                       sha1_to_hex(commit->object.sha1));
 271        return 0;
 272}
 273
 274static int fsck_tag(struct tag *tag)
 275{
 276        struct object *tagged = tag->tagged;
 277
 278        if (!tagged) {
 279                return objerror(&tag->object, "could not load tagged object");
 280        }
 281        if (!show_tags)
 282                return 0;
 283
 284        printf("tagged %s %s", typename(tagged->type), sha1_to_hex(tagged->sha1));
 285        printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
 286        return 0;
 287}
 288
 289static int fsck_sha1(unsigned char *sha1)
 290{
 291        struct object *obj = parse_object(sha1);
 292        if (!obj)
 293                return error("%s: object not found", sha1_to_hex(sha1));
 294        if (obj->flags & SEEN)
 295                return 0;
 296        obj->flags |= SEEN;
 297        if (obj->type == OBJ_BLOB)
 298                return 0;
 299        if (obj->type == OBJ_TREE)
 300                return fsck_tree((struct tree *) obj);
 301        if (obj->type == OBJ_COMMIT)
 302                return fsck_commit((struct commit *) obj);
 303        if (obj->type == OBJ_TAG)
 304                return fsck_tag((struct tag *) obj);
 305        /* By now, parse_object() would've returned NULL instead. */
 306        return objerror(obj, "unknown type '%d' (internal fsck error)", obj->type);
 307}
 308
 309/*
 310 * This is the sorting chunk size: make it reasonably
 311 * big so that we can sort well..
 312 */
 313#define MAX_SHA1_ENTRIES (1024)
 314
 315struct sha1_entry {
 316        unsigned long ino;
 317        unsigned char sha1[20];
 318};
 319
 320static struct {
 321        unsigned long nr;
 322        struct sha1_entry *entry[MAX_SHA1_ENTRIES];
 323} sha1_list;
 324
 325static int ino_compare(const void *_a, const void *_b)
 326{
 327        const struct sha1_entry *a = _a, *b = _b;
 328        unsigned long ino1 = a->ino, ino2 = b->ino;
 329        return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
 330}
 331
 332static void fsck_sha1_list(void)
 333{
 334        int i, nr = sha1_list.nr;
 335
 336        if (SORT_DIRENT)
 337                qsort(sha1_list.entry, nr,
 338                      sizeof(struct sha1_entry *), ino_compare);
 339        for (i = 0; i < nr; i++) {
 340                struct sha1_entry *entry = sha1_list.entry[i];
 341                unsigned char *sha1 = entry->sha1;
 342
 343                sha1_list.entry[i] = NULL;
 344                fsck_sha1(sha1);
 345                free(entry);
 346        }
 347        sha1_list.nr = 0;
 348}
 349
 350static void add_sha1_list(unsigned char *sha1, unsigned long ino)
 351{
 352        struct sha1_entry *entry = xmalloc(sizeof(*entry));
 353        int nr;
 354
 355        entry->ino = ino;
 356        hashcpy(entry->sha1, sha1);
 357        nr = sha1_list.nr;
 358        if (nr == MAX_SHA1_ENTRIES) {
 359                fsck_sha1_list();
 360                nr = 0;
 361        }
 362        sha1_list.entry[nr] = entry;
 363        sha1_list.nr = ++nr;
 364}
 365
 366static void fsck_dir(int i, char *path)
 367{
 368        DIR *dir = opendir(path);
 369        struct dirent *de;
 370
 371        if (!dir)
 372                return;
 373
 374        while ((de = readdir(dir)) != NULL) {
 375                char name[100];
 376                unsigned char sha1[20];
 377                int len = strlen(de->d_name);
 378
 379                switch (len) {
 380                case 2:
 381                        if (de->d_name[1] != '.')
 382                                break;
 383                case 1:
 384                        if (de->d_name[0] != '.')
 385                                break;
 386                        continue;
 387                case 38:
 388                        sprintf(name, "%02x", i);
 389                        memcpy(name+2, de->d_name, len+1);
 390                        if (get_sha1_hex(name, sha1) < 0)
 391                                break;
 392                        add_sha1_list(sha1, DIRENT_SORT_HINT(de));
 393                        continue;
 394                }
 395                fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
 396        }
 397        closedir(dir);
 398}
 399
 400static int default_refs;
 401
 402static int fsck_handle_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data)
 403{
 404        struct object *obj;
 405
 406        obj = lookup_object(sha1);
 407        if (!obj) {
 408                if (has_sha1_file(sha1)) {
 409                        default_refs++;
 410                        return 0; /* it is in a pack */
 411                }
 412                error("%s: invalid sha1 pointer %s", refname, sha1_to_hex(sha1));
 413                /* We'll continue with the rest despite the error.. */
 414                return 0;
 415        }
 416        default_refs++;
 417        obj->used = 1;
 418        mark_reachable(obj, REACHABLE);
 419        return 0;
 420}
 421
 422static void get_default_heads(void)
 423{
 424        for_each_ref(fsck_handle_ref, NULL);
 425
 426        /*
 427         * Not having any default heads isn't really fatal, but
 428         * it does mean that "--unreachable" no longer makes any
 429         * sense (since in this case everything will obviously
 430         * be unreachable by definition.
 431         *
 432         * Showing dangling objects is valid, though (as those
 433         * dangling objects are likely lost heads).
 434         *
 435         * So we just print a warning about it, and clear the
 436         * "show_unreachable" flag.
 437         */
 438        if (!default_refs) {
 439                error("No default references");
 440                show_unreachable = 0;
 441        }
 442}
 443
 444static void fsck_object_dir(const char *path)
 445{
 446        int i;
 447        for (i = 0; i < 256; i++) {
 448                static char dir[4096];
 449                sprintf(dir, "%s/%02x", path, i);
 450                fsck_dir(i, dir);
 451        }
 452        fsck_sha1_list();
 453}
 454
 455static int fsck_head_link(void)
 456{
 457        unsigned char sha1[20];
 458        int flag;
 459        const char *head_points_at = resolve_ref("HEAD", sha1, 1, &flag);
 460
 461        if (!head_points_at || !(flag & REF_ISSYMREF))
 462                return error("HEAD is not a symbolic ref");
 463        if (strncmp(head_points_at, "refs/heads/", 11))
 464                return error("HEAD points to something strange (%s)",
 465                             head_points_at);
 466        if (is_null_sha1(sha1))
 467                return error("HEAD: not a valid git pointer");
 468        return 0;
 469}
 470
 471static int fsck_cache_tree(struct cache_tree *it)
 472{
 473        int i;
 474        int err = 0;
 475
 476        if (0 <= it->entry_count) {
 477                struct object *obj = parse_object(it->sha1);
 478                if (!obj) {
 479                        error("%s: invalid sha1 pointer in cache-tree",
 480                              sha1_to_hex(it->sha1));
 481                        return 1;
 482                }
 483                mark_reachable(obj, REACHABLE);
 484                obj->used = 1;
 485                if (obj->type != OBJ_TREE)
 486                        err |= objerror(obj, "non-tree in cache-tree");
 487        }
 488        for (i = 0; i < it->subtree_nr; i++)
 489                err |= fsck_cache_tree(it->down[i]->cache_tree);
 490        return err;
 491}
 492
 493int main(int argc, char **argv)
 494{
 495        int i, heads;
 496
 497        track_object_refs = 1;
 498        setup_git_directory();
 499
 500        for (i = 1; i < argc; i++) {
 501                const char *arg = argv[i];
 502
 503                if (!strcmp(arg, "--unreachable")) {
 504                        show_unreachable = 1;
 505                        continue;
 506                }
 507                if (!strcmp(arg, "--tags")) {
 508                        show_tags = 1;
 509                        continue;
 510                }
 511                if (!strcmp(arg, "--root")) {
 512                        show_root = 1;
 513                        continue;
 514                }
 515                if (!strcmp(arg, "--cache")) {
 516                        keep_cache_objects = 1;
 517                        continue;
 518                }
 519                if (!strcmp(arg, "--full")) {
 520                        check_full = 1;
 521                        continue;
 522                }
 523                if (!strcmp(arg, "--strict")) {
 524                        check_strict = 1;
 525                        continue;
 526                }
 527                if (*arg == '-')
 528                        usage("git-fsck-objects [--tags] [--root] [[--unreachable] [--cache] [--full] [--strict] <head-sha1>*]");
 529        }
 530
 531        fsck_head_link();
 532        fsck_object_dir(get_object_directory());
 533        if (check_full) {
 534                struct alternate_object_database *alt;
 535                struct packed_git *p;
 536                prepare_alt_odb();
 537                for (alt = alt_odb_list; alt; alt = alt->next) {
 538                        char namebuf[PATH_MAX];
 539                        int namelen = alt->name - alt->base;
 540                        memcpy(namebuf, alt->base, namelen);
 541                        namebuf[namelen - 1] = 0;
 542                        fsck_object_dir(namebuf);
 543                }
 544                prepare_packed_git();
 545                for (p = packed_git; p; p = p->next)
 546                        /* verify gives error messages itself */
 547                        verify_pack(p, 0);
 548
 549                for (p = packed_git; p; p = p->next) {
 550                        int num = num_packed_objects(p);
 551                        for (i = 0; i < num; i++) {
 552                                unsigned char sha1[20];
 553                                nth_packed_object_sha1(p, i, sha1);
 554                                fsck_sha1(sha1);
 555                        }
 556                }
 557        }
 558
 559        heads = 0;
 560        for (i = 1; i < argc; i++) {
 561                const char *arg = argv[i]; 
 562
 563                if (*arg == '-')
 564                        continue;
 565
 566                if (!get_sha1(arg, head_sha1)) {
 567                        struct object *obj = lookup_object(head_sha1);
 568
 569                        /* Error is printed by lookup_object(). */
 570                        if (!obj)
 571                                continue;
 572
 573                        obj->used = 1;
 574                        mark_reachable(obj, REACHABLE);
 575                        heads++;
 576                        continue;
 577                }
 578                error("invalid parameter: expected sha1, got '%s'", arg);
 579        }
 580
 581        /*
 582         * If we've not been given any explicit head information, do the
 583         * default ones from .git/refs. We also consider the index file
 584         * in this case (ie this implies --cache).
 585         */
 586        if (!heads) {
 587                get_default_heads();
 588                keep_cache_objects = 1;
 589        }
 590
 591        if (keep_cache_objects) {
 592                int i;
 593                read_cache();
 594                for (i = 0; i < active_nr; i++) {
 595                        struct blob *blob = lookup_blob(active_cache[i]->sha1);
 596                        struct object *obj;
 597                        if (!blob)
 598                                continue;
 599                        obj = &blob->object;
 600                        obj->used = 1;
 601                        mark_reachable(obj, REACHABLE);
 602                }
 603                if (active_cache_tree)
 604                        fsck_cache_tree(active_cache_tree);
 605        }
 606
 607        check_connectivity();
 608        return 0;
 609}