builtin-fsck.con commit git-svn: reintroduce using a single get_log() to fetch (fbcc173)
   1#include "cache.h"
   2#include "commit.h"
   3#include "tree.h"
   4#include "blob.h"
   5#include "tag.h"
   6#include "refs.h"
   7#include "pack.h"
   8#include "cache-tree.h"
   9#include "tree-walk.h"
  10
  11#define REACHABLE 0x0001
  12#define SEEN      0x0002
  13
  14static int show_root;
  15static int show_tags;
  16static int show_unreachable;
  17static int check_full;
  18static int check_strict;
  19static int keep_cache_objects;
  20static unsigned char head_sha1[20];
  21
  22#ifdef NO_D_INO_IN_DIRENT
  23#define SORT_DIRENT 0
  24#define DIRENT_SORT_HINT(de) 0
  25#else
  26#define SORT_DIRENT 1
  27#define DIRENT_SORT_HINT(de) ((de)->d_ino)
  28#endif
  29
  30static void objreport(struct object *obj, const char *severity,
  31                      const char *err, va_list params)
  32{
  33        fprintf(stderr, "%s in %s %s: ",
  34                severity, typename(obj->type), sha1_to_hex(obj->sha1));
  35        vfprintf(stderr, err, params);
  36        fputs("\n", stderr);
  37}
  38
  39static int objerror(struct object *obj, const char *err, ...)
  40{
  41        va_list params;
  42        va_start(params, err);
  43        objreport(obj, "error", err, params);
  44        va_end(params);
  45        return -1;
  46}
  47
  48static int objwarning(struct object *obj, const char *err, ...)
  49{
  50        va_list params;
  51        va_start(params, err);
  52        objreport(obj, "warning", err, params);
  53        va_end(params);
  54        return -1;
  55}
  56
  57/*
  58 * Check a single reachable object
  59 */
  60static void check_reachable_object(struct object *obj)
  61{
  62        const struct object_refs *refs;
  63
  64        /*
  65         * We obviously want the object to be parsed,
  66         * except if it was in a pack-file and we didn't
  67         * do a full fsck
  68         */
  69        if (!obj->parsed) {
  70                if (has_sha1_file(obj->sha1))
  71                        return; /* it is in pack - forget about it */
  72                printf("missing %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1));
  73                return;
  74        }
  75
  76        /*
  77         * Check that everything that we try to reference is also good.
  78         */
  79        refs = lookup_object_refs(obj);
  80        if (refs) {
  81                unsigned j;
  82                for (j = 0; j < refs->count; j++) {
  83                        struct object *ref = refs->ref[j];
  84                        if (ref->parsed ||
  85                            (has_sha1_file(ref->sha1)))
  86                                continue;
  87                        printf("broken link from %7s %s\n",
  88                               typename(obj->type), sha1_to_hex(obj->sha1));
  89                        printf("              to %7s %s\n",
  90                               typename(ref->type), sha1_to_hex(ref->sha1));
  91                }
  92        }
  93}
  94
  95/*
  96 * Check a single unreachable object
  97 */
  98static void check_unreachable_object(struct object *obj)
  99{
 100        /*
 101         * Missing unreachable object? Ignore it. It's not like
 102         * we miss it (since it can't be reached), nor do we want
 103         * to complain about it being unreachable (since it does
 104         * not exist).
 105         */
 106        if (!obj->parsed)
 107                return;
 108
 109        /*
 110         * Unreachable object that exists? Show it if asked to,
 111         * since this is something that is prunable.
 112         */
 113        if (show_unreachable) {
 114                printf("unreachable %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1));
 115                return;
 116        }
 117
 118        /*
 119         * "!used" means that nothing at all points to it, including
 120         * other unreachable objects. In other words, it's the "tip"
 121         * of some set of unreachable objects, usually a commit that
 122         * got dropped.
 123         *
 124         * Such starting points are more interesting than some random
 125         * set of unreachable objects, so we show them even if the user
 126         * hasn't asked for _all_ unreachable objects. If you have
 127         * deleted a branch by mistake, this is a prime candidate to
 128         * start looking at, for example.
 129         */
 130        if (!obj->used) {
 131                printf("dangling %s %s\n", typename(obj->type),
 132                       sha1_to_hex(obj->sha1));
 133                return;
 134        }
 135
 136        /*
 137         * Otherwise? It's there, it's unreachable, and some other unreachable
 138         * object points to it. Ignore it - it's not interesting, and we showed
 139         * all the interesting cases above.
 140         */
 141}
 142
 143static void check_object(struct object *obj)
 144{
 145        if (obj->flags & REACHABLE)
 146                check_reachable_object(obj);
 147        else
 148                check_unreachable_object(obj);
 149}
 150
 151static void check_connectivity(void)
 152{
 153        int i, max;
 154
 155        /* Look up all the requirements, warn about missing objects.. */
 156        max = get_max_object_index();
 157        for (i = 0; i < max; i++) {
 158                struct object *obj = get_indexed_object(i);
 159
 160                if (obj)
 161                        check_object(obj);
 162        }
 163}
 164
 165/*
 166 * The entries in a tree are ordered in the _path_ order,
 167 * which means that a directory entry is ordered by adding
 168 * a slash to the end of it.
 169 *
 170 * So a directory called "a" is ordered _after_ a file
 171 * called "a.c", because "a/" sorts after "a.c".
 172 */
 173#define TREE_UNORDERED (-1)
 174#define TREE_HAS_DUPS  (-2)
 175
 176static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, const char *name2)
 177{
 178        int len1 = strlen(name1);
 179        int len2 = strlen(name2);
 180        int len = len1 < len2 ? len1 : len2;
 181        unsigned char c1, c2;
 182        int cmp;
 183
 184        cmp = memcmp(name1, name2, len);
 185        if (cmp < 0)
 186                return 0;
 187        if (cmp > 0)
 188                return TREE_UNORDERED;
 189
 190        /*
 191         * Ok, the first <len> characters are the same.
 192         * Now we need to order the next one, but turn
 193         * a '\0' into a '/' for a directory entry.
 194         */
 195        c1 = name1[len];
 196        c2 = name2[len];
 197        if (!c1 && !c2)
 198                /*
 199                 * git-write-tree used to write out a nonsense tree that has
 200                 * entries with the same name, one blob and one tree.  Make
 201                 * sure we do not have duplicate entries.
 202                 */
 203                return TREE_HAS_DUPS;
 204        if (!c1 && S_ISDIR(mode1))
 205                c1 = '/';
 206        if (!c2 && S_ISDIR(mode2))
 207                c2 = '/';
 208        return c1 < c2 ? 0 : TREE_UNORDERED;
 209}
 210
 211static int fsck_tree(struct tree *item)
 212{
 213        int retval;
 214        int has_full_path = 0;
 215        int has_zero_pad = 0;
 216        int has_bad_modes = 0;
 217        int has_dup_entries = 0;
 218        int not_properly_sorted = 0;
 219        struct tree_desc desc;
 220        unsigned o_mode;
 221        const char *o_name;
 222        const unsigned char *o_sha1;
 223
 224        desc.buf = item->buffer;
 225        desc.size = item->size;
 226
 227        o_mode = 0;
 228        o_name = NULL;
 229        o_sha1 = NULL;
 230        while (desc.size) {
 231                unsigned mode;
 232                const char *name;
 233                const unsigned char *sha1;
 234
 235                sha1 = tree_entry_extract(&desc, &name, &mode);
 236
 237                if (strchr(name, '/'))
 238                        has_full_path = 1;
 239                has_zero_pad |= *(char *)desc.buf == '0';
 240                update_tree_entry(&desc);
 241
 242                switch (mode) {
 243                /*
 244                 * Standard modes..
 245                 */
 246                case S_IFREG | 0755:
 247                case S_IFREG | 0644:
 248                case S_IFLNK:
 249                case S_IFDIR:
 250                        break;
 251                /*
 252                 * This is nonstandard, but we had a few of these
 253                 * early on when we honored the full set of mode
 254                 * bits..
 255                 */
 256                case S_IFREG | 0664:
 257                        if (!check_strict)
 258                                break;
 259                default:
 260                        has_bad_modes = 1;
 261                }
 262
 263                if (o_name) {
 264                        switch (verify_ordered(o_mode, o_name, mode, name)) {
 265                        case TREE_UNORDERED:
 266                                not_properly_sorted = 1;
 267                                break;
 268                        case TREE_HAS_DUPS:
 269                                has_dup_entries = 1;
 270                                break;
 271                        default:
 272                                break;
 273                        }
 274                }
 275
 276                o_mode = mode;
 277                o_name = name;
 278                o_sha1 = sha1;
 279        }
 280        free(item->buffer);
 281        item->buffer = NULL;
 282
 283        retval = 0;
 284        if (has_full_path) {
 285                objwarning(&item->object, "contains full pathnames");
 286        }
 287        if (has_zero_pad) {
 288                objwarning(&item->object, "contains zero-padded file modes");
 289        }
 290        if (has_bad_modes) {
 291                objwarning(&item->object, "contains bad file modes");
 292        }
 293        if (has_dup_entries) {
 294                retval = objerror(&item->object, "contains duplicate file entries");
 295        }
 296        if (not_properly_sorted) {
 297                retval = objerror(&item->object, "not properly sorted");
 298        }
 299        return retval;
 300}
 301
 302static int fsck_commit(struct commit *commit)
 303{
 304        char *buffer = commit->buffer;
 305        unsigned char tree_sha1[20], sha1[20];
 306
 307        if (memcmp(buffer, "tree ", 5))
 308                return objerror(&commit->object, "invalid format - expected 'tree' line");
 309        if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
 310                return objerror(&commit->object, "invalid 'tree' line format - bad sha1");
 311        buffer += 46;
 312        while (!memcmp(buffer, "parent ", 7)) {
 313                if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
 314                        return objerror(&commit->object, "invalid 'parent' line format - bad sha1");
 315                buffer += 48;
 316        }
 317        if (memcmp(buffer, "author ", 7))
 318                return objerror(&commit->object, "invalid format - expected 'author' line");
 319        free(commit->buffer);
 320        commit->buffer = NULL;
 321        if (!commit->tree)
 322                return objerror(&commit->object, "could not load commit's tree %s", tree_sha1);
 323        if (!commit->parents && show_root)
 324                printf("root %s\n", sha1_to_hex(commit->object.sha1));
 325        if (!commit->date)
 326                printf("bad commit date in %s\n", 
 327                       sha1_to_hex(commit->object.sha1));
 328        return 0;
 329}
 330
 331static int fsck_tag(struct tag *tag)
 332{
 333        struct object *tagged = tag->tagged;
 334
 335        if (!tagged) {
 336                return objerror(&tag->object, "could not load tagged object");
 337        }
 338        if (!show_tags)
 339                return 0;
 340
 341        printf("tagged %s %s", typename(tagged->type), sha1_to_hex(tagged->sha1));
 342        printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
 343        return 0;
 344}
 345
 346static int fsck_sha1(unsigned char *sha1)
 347{
 348        struct object *obj = parse_object(sha1);
 349        if (!obj)
 350                return error("%s: object corrupt or missing", sha1_to_hex(sha1));
 351        if (obj->flags & SEEN)
 352                return 0;
 353        obj->flags |= SEEN;
 354        if (obj->type == OBJ_BLOB)
 355                return 0;
 356        if (obj->type == OBJ_TREE)
 357                return fsck_tree((struct tree *) obj);
 358        if (obj->type == OBJ_COMMIT)
 359                return fsck_commit((struct commit *) obj);
 360        if (obj->type == OBJ_TAG)
 361                return fsck_tag((struct tag *) obj);
 362        /* By now, parse_object() would've returned NULL instead. */
 363        return objerror(obj, "unknown type '%d' (internal fsck error)", obj->type);
 364}
 365
 366/*
 367 * This is the sorting chunk size: make it reasonably
 368 * big so that we can sort well..
 369 */
 370#define MAX_SHA1_ENTRIES (1024)
 371
 372struct sha1_entry {
 373        unsigned long ino;
 374        unsigned char sha1[20];
 375};
 376
 377static struct {
 378        unsigned long nr;
 379        struct sha1_entry *entry[MAX_SHA1_ENTRIES];
 380} sha1_list;
 381
 382static int ino_compare(const void *_a, const void *_b)
 383{
 384        const struct sha1_entry *a = _a, *b = _b;
 385        unsigned long ino1 = a->ino, ino2 = b->ino;
 386        return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
 387}
 388
 389static void fsck_sha1_list(void)
 390{
 391        int i, nr = sha1_list.nr;
 392
 393        if (SORT_DIRENT)
 394                qsort(sha1_list.entry, nr,
 395                      sizeof(struct sha1_entry *), ino_compare);
 396        for (i = 0; i < nr; i++) {
 397                struct sha1_entry *entry = sha1_list.entry[i];
 398                unsigned char *sha1 = entry->sha1;
 399
 400                sha1_list.entry[i] = NULL;
 401                fsck_sha1(sha1);
 402                free(entry);
 403        }
 404        sha1_list.nr = 0;
 405}
 406
 407static void add_sha1_list(unsigned char *sha1, unsigned long ino)
 408{
 409        struct sha1_entry *entry = xmalloc(sizeof(*entry));
 410        int nr;
 411
 412        entry->ino = ino;
 413        hashcpy(entry->sha1, sha1);
 414        nr = sha1_list.nr;
 415        if (nr == MAX_SHA1_ENTRIES) {
 416                fsck_sha1_list();
 417                nr = 0;
 418        }
 419        sha1_list.entry[nr] = entry;
 420        sha1_list.nr = ++nr;
 421}
 422
 423static void fsck_dir(int i, char *path)
 424{
 425        DIR *dir = opendir(path);
 426        struct dirent *de;
 427
 428        if (!dir)
 429                return;
 430
 431        while ((de = readdir(dir)) != NULL) {
 432                char name[100];
 433                unsigned char sha1[20];
 434                int len = strlen(de->d_name);
 435
 436                switch (len) {
 437                case 2:
 438                        if (de->d_name[1] != '.')
 439                                break;
 440                case 1:
 441                        if (de->d_name[0] != '.')
 442                                break;
 443                        continue;
 444                case 38:
 445                        sprintf(name, "%02x", i);
 446                        memcpy(name+2, de->d_name, len+1);
 447                        if (get_sha1_hex(name, sha1) < 0)
 448                                break;
 449                        add_sha1_list(sha1, DIRENT_SORT_HINT(de));
 450                        continue;
 451                }
 452                fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
 453        }
 454        closedir(dir);
 455}
 456
 457static int default_refs;
 458
 459static int fsck_handle_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 460                const char *email, unsigned long timestamp, int tz,
 461                const char *message, void *cb_data)
 462{
 463        struct object *obj;
 464
 465        if (!is_null_sha1(osha1)) {
 466                obj = lookup_object(osha1);
 467                if (obj) {
 468                        obj->used = 1;
 469                        mark_reachable(obj, REACHABLE);
 470                }
 471        }
 472        obj = lookup_object(nsha1);
 473        if (obj) {
 474                obj->used = 1;
 475                mark_reachable(obj, REACHABLE);
 476        }
 477        return 0;
 478}
 479
 480static int fsck_handle_reflog(const char *logname, const unsigned char *sha1, int flag, void *cb_data)
 481{
 482        for_each_reflog_ent(logname, fsck_handle_reflog_ent, NULL);
 483        return 0;
 484}
 485
 486static int fsck_handle_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data)
 487{
 488        struct object *obj;
 489
 490        obj = lookup_object(sha1);
 491        if (!obj) {
 492                if (has_sha1_file(sha1)) {
 493                        default_refs++;
 494                        return 0; /* it is in a pack */
 495                }
 496                error("%s: invalid sha1 pointer %s", refname, sha1_to_hex(sha1));
 497                /* We'll continue with the rest despite the error.. */
 498                return 0;
 499        }
 500        default_refs++;
 501        obj->used = 1;
 502        mark_reachable(obj, REACHABLE);
 503
 504        return 0;
 505}
 506
 507static void get_default_heads(void)
 508{
 509        for_each_ref(fsck_handle_ref, NULL);
 510        for_each_reflog(fsck_handle_reflog, NULL);
 511
 512        /*
 513         * Not having any default heads isn't really fatal, but
 514         * it does mean that "--unreachable" no longer makes any
 515         * sense (since in this case everything will obviously
 516         * be unreachable by definition.
 517         *
 518         * Showing dangling objects is valid, though (as those
 519         * dangling objects are likely lost heads).
 520         *
 521         * So we just print a warning about it, and clear the
 522         * "show_unreachable" flag.
 523         */
 524        if (!default_refs) {
 525                error("No default references");
 526                show_unreachable = 0;
 527        }
 528}
 529
 530static void fsck_object_dir(const char *path)
 531{
 532        int i;
 533        for (i = 0; i < 256; i++) {
 534                static char dir[4096];
 535                sprintf(dir, "%s/%02x", path, i);
 536                fsck_dir(i, dir);
 537        }
 538        fsck_sha1_list();
 539}
 540
 541static int fsck_head_link(void)
 542{
 543        unsigned char sha1[20];
 544        int flag;
 545        const char *head_points_at = resolve_ref("HEAD", sha1, 1, &flag);
 546
 547        if (!head_points_at || !(flag & REF_ISSYMREF))
 548                return error("HEAD is not a symbolic ref");
 549        if (prefixcmp(head_points_at, "refs/heads/"))
 550                return error("HEAD points to something strange (%s)",
 551                             head_points_at);
 552        if (is_null_sha1(sha1))
 553                return error("HEAD: not a valid git pointer");
 554        return 0;
 555}
 556
 557static int fsck_cache_tree(struct cache_tree *it)
 558{
 559        int i;
 560        int err = 0;
 561
 562        if (0 <= it->entry_count) {
 563                struct object *obj = parse_object(it->sha1);
 564                if (!obj) {
 565                        error("%s: invalid sha1 pointer in cache-tree",
 566                              sha1_to_hex(it->sha1));
 567                        return 1;
 568                }
 569                mark_reachable(obj, REACHABLE);
 570                obj->used = 1;
 571                if (obj->type != OBJ_TREE)
 572                        err |= objerror(obj, "non-tree in cache-tree");
 573        }
 574        for (i = 0; i < it->subtree_nr; i++)
 575                err |= fsck_cache_tree(it->down[i]->cache_tree);
 576        return err;
 577}
 578
 579int cmd_fsck(int argc, char **argv, const char *prefix)
 580{
 581        int i, heads;
 582
 583        track_object_refs = 1;
 584
 585        for (i = 1; i < argc; i++) {
 586                const char *arg = argv[i];
 587
 588                if (!strcmp(arg, "--unreachable")) {
 589                        show_unreachable = 1;
 590                        continue;
 591                }
 592                if (!strcmp(arg, "--tags")) {
 593                        show_tags = 1;
 594                        continue;
 595                }
 596                if (!strcmp(arg, "--root")) {
 597                        show_root = 1;
 598                        continue;
 599                }
 600                if (!strcmp(arg, "--cache")) {
 601                        keep_cache_objects = 1;
 602                        continue;
 603                }
 604                if (!strcmp(arg, "--full")) {
 605                        check_full = 1;
 606                        continue;
 607                }
 608                if (!strcmp(arg, "--strict")) {
 609                        check_strict = 1;
 610                        continue;
 611                }
 612                if (*arg == '-')
 613                        usage("git-fsck [--tags] [--root] [[--unreachable] [--cache] [--full] [--strict] <head-sha1>*]");
 614        }
 615
 616        fsck_head_link();
 617        fsck_object_dir(get_object_directory());
 618        if (check_full) {
 619                struct alternate_object_database *alt;
 620                struct packed_git *p;
 621                prepare_alt_odb();
 622                for (alt = alt_odb_list; alt; alt = alt->next) {
 623                        char namebuf[PATH_MAX];
 624                        int namelen = alt->name - alt->base;
 625                        memcpy(namebuf, alt->base, namelen);
 626                        namebuf[namelen - 1] = 0;
 627                        fsck_object_dir(namebuf);
 628                }
 629                prepare_packed_git();
 630                for (p = packed_git; p; p = p->next)
 631                        /* verify gives error messages itself */
 632                        verify_pack(p, 0);
 633
 634                for (p = packed_git; p; p = p->next) {
 635                        int num = num_packed_objects(p);
 636                        for (i = 0; i < num; i++) {
 637                                unsigned char sha1[20];
 638                                nth_packed_object_sha1(p, i, sha1);
 639                                fsck_sha1(sha1);
 640                        }
 641                }
 642        }
 643
 644        heads = 0;
 645        for (i = 1; i < argc; i++) {
 646                const char *arg = argv[i]; 
 647
 648                if (*arg == '-')
 649                        continue;
 650
 651                if (!get_sha1(arg, head_sha1)) {
 652                        struct object *obj = lookup_object(head_sha1);
 653
 654                        /* Error is printed by lookup_object(). */
 655                        if (!obj)
 656                                continue;
 657
 658                        obj->used = 1;
 659                        mark_reachable(obj, REACHABLE);
 660                        heads++;
 661                        continue;
 662                }
 663                error("invalid parameter: expected sha1, got '%s'", arg);
 664        }
 665
 666        /*
 667         * If we've not been given any explicit head information, do the
 668         * default ones from .git/refs. We also consider the index file
 669         * in this case (ie this implies --cache).
 670         */
 671        if (!heads) {
 672                get_default_heads();
 673                keep_cache_objects = 1;
 674        }
 675
 676        if (keep_cache_objects) {
 677                int i;
 678                read_cache();
 679                for (i = 0; i < active_nr; i++) {
 680                        struct blob *blob = lookup_blob(active_cache[i]->sha1);
 681                        struct object *obj;
 682                        if (!blob)
 683                                continue;
 684                        obj = &blob->object;
 685                        obj->used = 1;
 686                        mark_reachable(obj, REACHABLE);
 687                }
 688                if (active_cache_tree)
 689                        fsck_cache_tree(active_cache_tree);
 690        }
 691
 692        check_connectivity();
 693        return 0;
 694}