builtin-fsck.con commit grep: teach --untracked and --exclude-standard options (0a93fb8)
   1#include "builtin.h"
   2#include "cache.h"
   3#include "commit.h"
   4#include "tree.h"
   5#include "blob.h"
   6#include "tag.h"
   7#include "refs.h"
   8#include "pack.h"
   9#include "cache-tree.h"
  10#include "tree-walk.h"
  11#include "fsck.h"
  12#include "parse-options.h"
  13#include "dir.h"
  14
  15#define REACHABLE 0x0001
  16#define SEEN      0x0002
  17
  18static int show_root;
  19static int show_tags;
  20static int show_unreachable;
  21static int include_reflogs = 1;
  22static int check_full = 1;
  23static int check_strict;
  24static int keep_cache_objects;
  25static unsigned char head_sha1[20];
  26static const char *head_points_at;
  27static int errors_found;
  28static int write_lost_and_found;
  29static int verbose;
  30#define ERROR_OBJECT 01
  31#define ERROR_REACHABLE 02
  32
  33#ifdef NO_D_INO_IN_DIRENT
  34#define SORT_DIRENT 0
  35#define DIRENT_SORT_HINT(de) 0
  36#else
  37#define SORT_DIRENT 1
  38#define DIRENT_SORT_HINT(de) ((de)->d_ino)
  39#endif
  40
  41static void objreport(struct object *obj, const char *severity,
  42                      const char *err, va_list params)
  43{
  44        fprintf(stderr, "%s in %s %s: ",
  45                severity, typename(obj->type), sha1_to_hex(obj->sha1));
  46        vfprintf(stderr, err, params);
  47        fputs("\n", stderr);
  48}
  49
  50__attribute__((format (printf, 2, 3)))
  51static int objerror(struct object *obj, const char *err, ...)
  52{
  53        va_list params;
  54        va_start(params, err);
  55        errors_found |= ERROR_OBJECT;
  56        objreport(obj, "error", err, params);
  57        va_end(params);
  58        return -1;
  59}
  60
  61__attribute__((format (printf, 3, 4)))
  62static int fsck_error_func(struct object *obj, int type, const char *err, ...)
  63{
  64        va_list params;
  65        va_start(params, err);
  66        objreport(obj, (type == FSCK_WARN) ? "warning" : "error", err, params);
  67        va_end(params);
  68        return (type == FSCK_WARN) ? 0 : 1;
  69}
  70
  71static struct object_array pending;
  72
  73static int mark_object(struct object *obj, int type, void *data)
  74{
  75        struct object *parent = data;
  76
  77        if (!obj) {
  78                printf("broken link from %7s %s\n",
  79                           typename(parent->type), sha1_to_hex(parent->sha1));
  80                printf("broken link from %7s %s\n",
  81                           (type == OBJ_ANY ? "unknown" : typename(type)), "unknown");
  82                errors_found |= ERROR_REACHABLE;
  83                return 1;
  84        }
  85
  86        if (type != OBJ_ANY && obj->type != type)
  87                objerror(parent, "wrong object type in link");
  88
  89        if (obj->flags & REACHABLE)
  90                return 0;
  91        obj->flags |= REACHABLE;
  92        if (!obj->parsed) {
  93                if (parent && !has_sha1_file(obj->sha1)) {
  94                        printf("broken link from %7s %s\n",
  95                                 typename(parent->type), sha1_to_hex(parent->sha1));
  96                        printf("              to %7s %s\n",
  97                                 typename(obj->type), sha1_to_hex(obj->sha1));
  98                        errors_found |= ERROR_REACHABLE;
  99                }
 100                return 1;
 101        }
 102
 103        add_object_array(obj, (void *) parent, &pending);
 104        return 0;
 105}
 106
 107static void mark_object_reachable(struct object *obj)
 108{
 109        mark_object(obj, OBJ_ANY, NULL);
 110}
 111
 112static int traverse_one_object(struct object *obj, struct object *parent)
 113{
 114        int result;
 115        struct tree *tree = NULL;
 116
 117        if (obj->type == OBJ_TREE) {
 118                obj->parsed = 0;
 119                tree = (struct tree *)obj;
 120                if (parse_tree(tree) < 0)
 121                        return 1; /* error already displayed */
 122        }
 123        result = fsck_walk(obj, mark_object, obj);
 124        if (tree) {
 125                free(tree->buffer);
 126                tree->buffer = NULL;
 127        }
 128        return result;
 129}
 130
 131static int traverse_reachable(void)
 132{
 133        int result = 0;
 134        while (pending.nr) {
 135                struct object_array_entry *entry;
 136                struct object *obj, *parent;
 137
 138                entry = pending.objects + --pending.nr;
 139                obj = entry->item;
 140                parent = (struct object *) entry->name;
 141                result |= traverse_one_object(obj, parent);
 142        }
 143        return !!result;
 144}
 145
 146static int mark_used(struct object *obj, int type, void *data)
 147{
 148        if (!obj)
 149                return 1;
 150        obj->used = 1;
 151        return 0;
 152}
 153
 154/*
 155 * Check a single reachable object
 156 */
 157static void check_reachable_object(struct object *obj)
 158{
 159        /*
 160         * We obviously want the object to be parsed,
 161         * except if it was in a pack-file and we didn't
 162         * do a full fsck
 163         */
 164        if (!obj->parsed) {
 165                if (has_sha1_pack(obj->sha1))
 166                        return; /* it is in pack - forget about it */
 167                printf("missing %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1));
 168                errors_found |= ERROR_REACHABLE;
 169                return;
 170        }
 171}
 172
 173/*
 174 * Check a single unreachable object
 175 */
 176static void check_unreachable_object(struct object *obj)
 177{
 178        /*
 179         * Missing unreachable object? Ignore it. It's not like
 180         * we miss it (since it can't be reached), nor do we want
 181         * to complain about it being unreachable (since it does
 182         * not exist).
 183         */
 184        if (!obj->parsed)
 185                return;
 186
 187        /*
 188         * Unreachable object that exists? Show it if asked to,
 189         * since this is something that is prunable.
 190         */
 191        if (show_unreachable) {
 192                printf("unreachable %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1));
 193                return;
 194        }
 195
 196        /*
 197         * "!used" means that nothing at all points to it, including
 198         * other unreachable objects. In other words, it's the "tip"
 199         * of some set of unreachable objects, usually a commit that
 200         * got dropped.
 201         *
 202         * Such starting points are more interesting than some random
 203         * set of unreachable objects, so we show them even if the user
 204         * hasn't asked for _all_ unreachable objects. If you have
 205         * deleted a branch by mistake, this is a prime candidate to
 206         * start looking at, for example.
 207         */
 208        if (!obj->used) {
 209                printf("dangling %s %s\n", typename(obj->type),
 210                       sha1_to_hex(obj->sha1));
 211                if (write_lost_and_found) {
 212                        char *filename = git_path("lost-found/%s/%s",
 213                                obj->type == OBJ_COMMIT ? "commit" : "other",
 214                                sha1_to_hex(obj->sha1));
 215                        FILE *f;
 216
 217                        if (safe_create_leading_directories(filename)) {
 218                                error("Could not create lost-found");
 219                                return;
 220                        }
 221                        if (!(f = fopen(filename, "w")))
 222                                die_errno("Could not open '%s'", filename);
 223                        if (obj->type == OBJ_BLOB) {
 224                                enum object_type type;
 225                                unsigned long size;
 226                                char *buf = read_sha1_file(obj->sha1,
 227                                                &type, &size);
 228                                if (buf) {
 229                                        if (fwrite(buf, size, 1, f) != 1)
 230                                                die_errno("Could not write '%s'",
 231                                                          filename);
 232                                        free(buf);
 233                                }
 234                        } else
 235                                fprintf(f, "%s\n", sha1_to_hex(obj->sha1));
 236                        if (fclose(f))
 237                                die_errno("Could not finish '%s'",
 238                                          filename);
 239                }
 240                return;
 241        }
 242
 243        /*
 244         * Otherwise? It's there, it's unreachable, and some other unreachable
 245         * object points to it. Ignore it - it's not interesting, and we showed
 246         * all the interesting cases above.
 247         */
 248}
 249
 250static void check_object(struct object *obj)
 251{
 252        if (verbose)
 253                fprintf(stderr, "Checking %s\n", sha1_to_hex(obj->sha1));
 254
 255        if (obj->flags & REACHABLE)
 256                check_reachable_object(obj);
 257        else
 258                check_unreachable_object(obj);
 259}
 260
 261static void check_connectivity(void)
 262{
 263        int i, max;
 264
 265        /* Traverse the pending reachable objects */
 266        traverse_reachable();
 267
 268        /* Look up all the requirements, warn about missing objects.. */
 269        max = get_max_object_index();
 270        if (verbose)
 271                fprintf(stderr, "Checking connectivity (%d objects)\n", max);
 272
 273        for (i = 0; i < max; i++) {
 274                struct object *obj = get_indexed_object(i);
 275
 276                if (obj)
 277                        check_object(obj);
 278        }
 279}
 280
 281static int fsck_sha1(const unsigned char *sha1)
 282{
 283        struct object *obj = parse_object(sha1);
 284        if (!obj) {
 285                errors_found |= ERROR_OBJECT;
 286                return error("%s: object corrupt or missing",
 287                             sha1_to_hex(sha1));
 288        }
 289        if (obj->flags & SEEN)
 290                return 0;
 291        obj->flags |= SEEN;
 292
 293        if (verbose)
 294                fprintf(stderr, "Checking %s %s\n",
 295                        typename(obj->type), sha1_to_hex(obj->sha1));
 296
 297        if (fsck_walk(obj, mark_used, NULL))
 298                objerror(obj, "broken links");
 299        if (fsck_object(obj, check_strict, fsck_error_func))
 300                return -1;
 301
 302        if (obj->type == OBJ_TREE) {
 303                struct tree *item = (struct tree *) obj;
 304
 305                free(item->buffer);
 306                item->buffer = NULL;
 307        }
 308
 309        if (obj->type == OBJ_COMMIT) {
 310                struct commit *commit = (struct commit *) obj;
 311
 312                free(commit->buffer);
 313                commit->buffer = NULL;
 314
 315                if (!commit->parents && show_root)
 316                        printf("root %s\n", sha1_to_hex(commit->object.sha1));
 317        }
 318
 319        if (obj->type == OBJ_TAG) {
 320                struct tag *tag = (struct tag *) obj;
 321
 322                if (show_tags && tag->tagged) {
 323                        printf("tagged %s %s", typename(tag->tagged->type), sha1_to_hex(tag->tagged->sha1));
 324                        printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
 325                }
 326        }
 327
 328        return 0;
 329}
 330
 331/*
 332 * This is the sorting chunk size: make it reasonably
 333 * big so that we can sort well..
 334 */
 335#define MAX_SHA1_ENTRIES (1024)
 336
 337struct sha1_entry {
 338        unsigned long ino;
 339        unsigned char sha1[20];
 340};
 341
 342static struct {
 343        unsigned long nr;
 344        struct sha1_entry *entry[MAX_SHA1_ENTRIES];
 345} sha1_list;
 346
 347static int ino_compare(const void *_a, const void *_b)
 348{
 349        const struct sha1_entry *a = _a, *b = _b;
 350        unsigned long ino1 = a->ino, ino2 = b->ino;
 351        return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
 352}
 353
 354static void fsck_sha1_list(void)
 355{
 356        int i, nr = sha1_list.nr;
 357
 358        if (SORT_DIRENT)
 359                qsort(sha1_list.entry, nr,
 360                      sizeof(struct sha1_entry *), ino_compare);
 361        for (i = 0; i < nr; i++) {
 362                struct sha1_entry *entry = sha1_list.entry[i];
 363                unsigned char *sha1 = entry->sha1;
 364
 365                sha1_list.entry[i] = NULL;
 366                fsck_sha1(sha1);
 367                free(entry);
 368        }
 369        sha1_list.nr = 0;
 370}
 371
 372static void add_sha1_list(unsigned char *sha1, unsigned long ino)
 373{
 374        struct sha1_entry *entry = xmalloc(sizeof(*entry));
 375        int nr;
 376
 377        entry->ino = ino;
 378        hashcpy(entry->sha1, sha1);
 379        nr = sha1_list.nr;
 380        if (nr == MAX_SHA1_ENTRIES) {
 381                fsck_sha1_list();
 382                nr = 0;
 383        }
 384        sha1_list.entry[nr] = entry;
 385        sha1_list.nr = ++nr;
 386}
 387
 388static void fsck_dir(int i, char *path)
 389{
 390        DIR *dir = opendir(path);
 391        struct dirent *de;
 392
 393        if (!dir)
 394                return;
 395
 396        if (verbose)
 397                fprintf(stderr, "Checking directory %s\n", path);
 398
 399        while ((de = readdir(dir)) != NULL) {
 400                char name[100];
 401                unsigned char sha1[20];
 402
 403                if (is_dot_or_dotdot(de->d_name))
 404                        continue;
 405                if (strlen(de->d_name) == 38) {
 406                        sprintf(name, "%02x", i);
 407                        memcpy(name+2, de->d_name, 39);
 408                        if (get_sha1_hex(name, sha1) < 0)
 409                                break;
 410                        add_sha1_list(sha1, DIRENT_SORT_HINT(de));
 411                        continue;
 412                }
 413                if (!prefixcmp(de->d_name, "tmp_obj_"))
 414                        continue;
 415                fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
 416        }
 417        closedir(dir);
 418}
 419
 420static int default_refs;
 421
 422static int fsck_handle_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 423                const char *email, unsigned long timestamp, int tz,
 424                const char *message, void *cb_data)
 425{
 426        struct object *obj;
 427
 428        if (verbose)
 429                fprintf(stderr, "Checking reflog %s->%s\n",
 430                        sha1_to_hex(osha1), sha1_to_hex(nsha1));
 431
 432        if (!is_null_sha1(osha1)) {
 433                obj = lookup_object(osha1);
 434                if (obj) {
 435                        obj->used = 1;
 436                        mark_object_reachable(obj);
 437                }
 438        }
 439        obj = lookup_object(nsha1);
 440        if (obj) {
 441                obj->used = 1;
 442                mark_object_reachable(obj);
 443        }
 444        return 0;
 445}
 446
 447static int fsck_handle_reflog(const char *logname, const unsigned char *sha1, int flag, void *cb_data)
 448{
 449        for_each_reflog_ent(logname, fsck_handle_reflog_ent, NULL);
 450        return 0;
 451}
 452
 453static int is_branch(const char *refname)
 454{
 455        return !strcmp(refname, "HEAD") || !prefixcmp(refname, "refs/heads/");
 456}
 457
 458static int fsck_handle_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data)
 459{
 460        struct object *obj;
 461
 462        obj = parse_object(sha1);
 463        if (!obj) {
 464                error("%s: invalid sha1 pointer %s", refname, sha1_to_hex(sha1));
 465                /* We'll continue with the rest despite the error.. */
 466                return 0;
 467        }
 468        if (obj->type != OBJ_COMMIT && is_branch(refname))
 469                error("%s: not a commit", refname);
 470        default_refs++;
 471        obj->used = 1;
 472        mark_object_reachable(obj);
 473
 474        return 0;
 475}
 476
 477static void get_default_heads(void)
 478{
 479        if (head_points_at && !is_null_sha1(head_sha1))
 480                fsck_handle_ref("HEAD", head_sha1, 0, NULL);
 481        for_each_ref(fsck_handle_ref, NULL);
 482        if (include_reflogs)
 483                for_each_reflog(fsck_handle_reflog, NULL);
 484
 485        /*
 486         * Not having any default heads isn't really fatal, but
 487         * it does mean that "--unreachable" no longer makes any
 488         * sense (since in this case everything will obviously
 489         * be unreachable by definition.
 490         *
 491         * Showing dangling objects is valid, though (as those
 492         * dangling objects are likely lost heads).
 493         *
 494         * So we just print a warning about it, and clear the
 495         * "show_unreachable" flag.
 496         */
 497        if (!default_refs) {
 498                fprintf(stderr, "notice: No default references\n");
 499                show_unreachable = 0;
 500        }
 501}
 502
 503static void fsck_object_dir(const char *path)
 504{
 505        int i;
 506
 507        if (verbose)
 508                fprintf(stderr, "Checking object directory\n");
 509
 510        for (i = 0; i < 256; i++) {
 511                static char dir[4096];
 512                sprintf(dir, "%s/%02x", path, i);
 513                fsck_dir(i, dir);
 514        }
 515        fsck_sha1_list();
 516}
 517
 518static int fsck_head_link(void)
 519{
 520        int flag;
 521        int null_is_error = 0;
 522
 523        if (verbose)
 524                fprintf(stderr, "Checking HEAD link\n");
 525
 526        head_points_at = resolve_ref("HEAD", head_sha1, 0, &flag);
 527        if (!head_points_at)
 528                return error("Invalid HEAD");
 529        if (!strcmp(head_points_at, "HEAD"))
 530                /* detached HEAD */
 531                null_is_error = 1;
 532        else if (prefixcmp(head_points_at, "refs/heads/"))
 533                return error("HEAD points to something strange (%s)",
 534                             head_points_at);
 535        if (is_null_sha1(head_sha1)) {
 536                if (null_is_error)
 537                        return error("HEAD: detached HEAD points at nothing");
 538                fprintf(stderr, "notice: HEAD points to an unborn branch (%s)\n",
 539                        head_points_at + 11);
 540        }
 541        return 0;
 542}
 543
 544static int fsck_cache_tree(struct cache_tree *it)
 545{
 546        int i;
 547        int err = 0;
 548
 549        if (verbose)
 550                fprintf(stderr, "Checking cache tree\n");
 551
 552        if (0 <= it->entry_count) {
 553                struct object *obj = parse_object(it->sha1);
 554                if (!obj) {
 555                        error("%s: invalid sha1 pointer in cache-tree",
 556                              sha1_to_hex(it->sha1));
 557                        return 1;
 558                }
 559                mark_object_reachable(obj);
 560                obj->used = 1;
 561                if (obj->type != OBJ_TREE)
 562                        err |= objerror(obj, "non-tree in cache-tree");
 563        }
 564        for (i = 0; i < it->subtree_nr; i++)
 565                err |= fsck_cache_tree(it->down[i]->cache_tree);
 566        return err;
 567}
 568
 569static char const * const fsck_usage[] = {
 570        "git fsck [options] [<object>...]",
 571        NULL
 572};
 573
 574static struct option fsck_opts[] = {
 575        OPT__VERBOSE(&verbose),
 576        OPT_BOOLEAN(0, "unreachable", &show_unreachable, "show unreachable objects"),
 577        OPT_BOOLEAN(0, "tags", &show_tags, "report tags"),
 578        OPT_BOOLEAN(0, "root", &show_root, "report root nodes"),
 579        OPT_BOOLEAN(0, "cache", &keep_cache_objects, "make index objects head nodes"),
 580        OPT_BOOLEAN(0, "reflogs", &include_reflogs, "make reflogs head nodes (default)"),
 581        OPT_BOOLEAN(0, "full", &check_full, "also consider packs and alternate objects"),
 582        OPT_BOOLEAN(0, "strict", &check_strict, "enable more strict checking"),
 583        OPT_BOOLEAN(0, "lost-found", &write_lost_and_found,
 584                                "write dangling objects in .git/lost-found"),
 585        OPT_END(),
 586};
 587
 588int cmd_fsck(int argc, const char **argv, const char *prefix)
 589{
 590        int i, heads;
 591        struct alternate_object_database *alt;
 592
 593        errors_found = 0;
 594        read_replace_refs = 0;
 595
 596        argc = parse_options(argc, argv, prefix, fsck_opts, fsck_usage, 0);
 597        if (write_lost_and_found) {
 598                check_full = 1;
 599                include_reflogs = 0;
 600        }
 601
 602        fsck_head_link();
 603        fsck_object_dir(get_object_directory());
 604
 605        prepare_alt_odb();
 606        for (alt = alt_odb_list; alt; alt = alt->next) {
 607                char namebuf[PATH_MAX];
 608                int namelen = alt->name - alt->base;
 609                memcpy(namebuf, alt->base, namelen);
 610                namebuf[namelen - 1] = 0;
 611                fsck_object_dir(namebuf);
 612        }
 613
 614        if (check_full) {
 615                struct packed_git *p;
 616
 617                prepare_packed_git();
 618                for (p = packed_git; p; p = p->next)
 619                        /* verify gives error messages itself */
 620                        verify_pack(p);
 621
 622                for (p = packed_git; p; p = p->next) {
 623                        uint32_t j, num;
 624                        if (open_pack_index(p))
 625                                continue;
 626                        num = p->num_objects;
 627                        for (j = 0; j < num; j++)
 628                                fsck_sha1(nth_packed_object_sha1(p, j));
 629                }
 630        }
 631
 632        heads = 0;
 633        for (i = 0; i < argc; i++) {
 634                const char *arg = argv[i];
 635                unsigned char sha1[20];
 636                if (!get_sha1(arg, sha1)) {
 637                        struct object *obj = lookup_object(sha1);
 638
 639                        /* Error is printed by lookup_object(). */
 640                        if (!obj)
 641                                continue;
 642
 643                        obj->used = 1;
 644                        mark_object_reachable(obj);
 645                        heads++;
 646                        continue;
 647                }
 648                error("invalid parameter: expected sha1, got '%s'", arg);
 649        }
 650
 651        /*
 652         * If we've not been given any explicit head information, do the
 653         * default ones from .git/refs. We also consider the index file
 654         * in this case (ie this implies --cache).
 655         */
 656        if (!heads) {
 657                get_default_heads();
 658                keep_cache_objects = 1;
 659        }
 660
 661        if (keep_cache_objects) {
 662                read_cache();
 663                for (i = 0; i < active_nr; i++) {
 664                        unsigned int mode;
 665                        struct blob *blob;
 666                        struct object *obj;
 667
 668                        mode = active_cache[i]->ce_mode;
 669                        if (S_ISGITLINK(mode))
 670                                continue;
 671                        blob = lookup_blob(active_cache[i]->sha1);
 672                        if (!blob)
 673                                continue;
 674                        obj = &blob->object;
 675                        obj->used = 1;
 676                        mark_object_reachable(obj);
 677                }
 678                if (active_cache_tree)
 679                        fsck_cache_tree(active_cache_tree);
 680        }
 681
 682        check_connectivity();
 683        return errors_found;
 684}