builtin-fsck.con commit daemon: Strictly parse the "extra arg" part of the command (73bb33a)
   1#include "builtin.h"
   2#include "cache.h"
   3#include "commit.h"
   4#include "tree.h"
   5#include "blob.h"
   6#include "tag.h"
   7#include "refs.h"
   8#include "pack.h"
   9#include "cache-tree.h"
  10#include "tree-walk.h"
  11#include "fsck.h"
  12#include "parse-options.h"
  13#include "dir.h"
  14
  15#define REACHABLE 0x0001
  16#define SEEN      0x0002
  17
  18static int show_root;
  19static int show_tags;
  20static int show_unreachable;
  21static int include_reflogs = 1;
  22static int check_full;
  23static int check_strict;
  24static int keep_cache_objects;
  25static unsigned char head_sha1[20];
  26static const char *head_points_at;
  27static int errors_found;
  28static int write_lost_and_found;
  29static int verbose;
  30#define ERROR_OBJECT 01
  31#define ERROR_REACHABLE 02
  32
  33#ifdef NO_D_INO_IN_DIRENT
  34#define SORT_DIRENT 0
  35#define DIRENT_SORT_HINT(de) 0
  36#else
  37#define SORT_DIRENT 1
  38#define DIRENT_SORT_HINT(de) ((de)->d_ino)
  39#endif
  40
  41static void objreport(struct object *obj, const char *severity,
  42                      const char *err, va_list params)
  43{
  44        fprintf(stderr, "%s in %s %s: ",
  45                severity, typename(obj->type), sha1_to_hex(obj->sha1));
  46        vfprintf(stderr, err, params);
  47        fputs("\n", stderr);
  48}
  49
  50static int objerror(struct object *obj, const char *err, ...)
  51{
  52        va_list params;
  53        va_start(params, err);
  54        errors_found |= ERROR_OBJECT;
  55        objreport(obj, "error", err, params);
  56        va_end(params);
  57        return -1;
  58}
  59
  60static int fsck_error_func(struct object *obj, int type, const char *err, ...)
  61{
  62        va_list params;
  63        va_start(params, err);
  64        objreport(obj, (type == FSCK_WARN) ? "warning" : "error", err, params);
  65        va_end(params);
  66        return (type == FSCK_WARN) ? 0 : 1;
  67}
  68
  69static struct object_array pending;
  70
  71static int mark_object(struct object *obj, int type, void *data)
  72{
  73        struct object *parent = data;
  74
  75        if (!obj) {
  76                printf("broken link from %7s %s\n",
  77                           typename(parent->type), sha1_to_hex(parent->sha1));
  78                printf("broken link from %7s %s\n",
  79                           (type == OBJ_ANY ? "unknown" : typename(type)), "unknown");
  80                errors_found |= ERROR_REACHABLE;
  81                return 1;
  82        }
  83
  84        if (type != OBJ_ANY && obj->type != type)
  85                objerror(parent, "wrong object type in link");
  86
  87        if (obj->flags & REACHABLE)
  88                return 0;
  89        obj->flags |= REACHABLE;
  90        if (!obj->parsed) {
  91                if (parent && !has_sha1_file(obj->sha1)) {
  92                        printf("broken link from %7s %s\n",
  93                                 typename(parent->type), sha1_to_hex(parent->sha1));
  94                        printf("              to %7s %s\n",
  95                                 typename(obj->type), sha1_to_hex(obj->sha1));
  96                        errors_found |= ERROR_REACHABLE;
  97                }
  98                return 1;
  99        }
 100
 101        add_object_array(obj, (void *) parent, &pending);
 102        return 0;
 103}
 104
 105static void mark_object_reachable(struct object *obj)
 106{
 107        mark_object(obj, OBJ_ANY, 0);
 108}
 109
 110static int traverse_one_object(struct object *obj, struct object *parent)
 111{
 112        int result;
 113        struct tree *tree = NULL;
 114
 115        if (obj->type == OBJ_TREE) {
 116                obj->parsed = 0;
 117                tree = (struct tree *)obj;
 118                if (parse_tree(tree) < 0)
 119                        return 1; /* error already displayed */
 120        }
 121        result = fsck_walk(obj, mark_object, obj);
 122        if (tree) {
 123                free(tree->buffer);
 124                tree->buffer = NULL;
 125        }
 126        return result;
 127}
 128
 129static int traverse_reachable(void)
 130{
 131        int result = 0;
 132        while (pending.nr) {
 133                struct object_array_entry *entry;
 134                struct object *obj, *parent;
 135
 136                entry = pending.objects + --pending.nr;
 137                obj = entry->item;
 138                parent = (struct object *) entry->name;
 139                result |= traverse_one_object(obj, parent);
 140        }
 141        return !!result;
 142}
 143
 144static int mark_used(struct object *obj, int type, void *data)
 145{
 146        if (!obj)
 147                return 1;
 148        obj->used = 1;
 149        return 0;
 150}
 151
 152/*
 153 * Check a single reachable object
 154 */
 155static void check_reachable_object(struct object *obj)
 156{
 157        /*
 158         * We obviously want the object to be parsed,
 159         * except if it was in a pack-file and we didn't
 160         * do a full fsck
 161         */
 162        if (!obj->parsed) {
 163                if (has_sha1_pack(obj->sha1))
 164                        return; /* it is in pack - forget about it */
 165                printf("missing %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1));
 166                errors_found |= ERROR_REACHABLE;
 167                return;
 168        }
 169}
 170
 171/*
 172 * Check a single unreachable object
 173 */
 174static void check_unreachable_object(struct object *obj)
 175{
 176        /*
 177         * Missing unreachable object? Ignore it. It's not like
 178         * we miss it (since it can't be reached), nor do we want
 179         * to complain about it being unreachable (since it does
 180         * not exist).
 181         */
 182        if (!obj->parsed)
 183                return;
 184
 185        /*
 186         * Unreachable object that exists? Show it if asked to,
 187         * since this is something that is prunable.
 188         */
 189        if (show_unreachable) {
 190                printf("unreachable %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1));
 191                return;
 192        }
 193
 194        /*
 195         * "!used" means that nothing at all points to it, including
 196         * other unreachable objects. In other words, it's the "tip"
 197         * of some set of unreachable objects, usually a commit that
 198         * got dropped.
 199         *
 200         * Such starting points are more interesting than some random
 201         * set of unreachable objects, so we show them even if the user
 202         * hasn't asked for _all_ unreachable objects. If you have
 203         * deleted a branch by mistake, this is a prime candidate to
 204         * start looking at, for example.
 205         */
 206        if (!obj->used) {
 207                printf("dangling %s %s\n", typename(obj->type),
 208                       sha1_to_hex(obj->sha1));
 209                if (write_lost_and_found) {
 210                        char *filename = git_path("lost-found/%s/%s",
 211                                obj->type == OBJ_COMMIT ? "commit" : "other",
 212                                sha1_to_hex(obj->sha1));
 213                        FILE *f;
 214
 215                        if (safe_create_leading_directories(filename)) {
 216                                error("Could not create lost-found");
 217                                return;
 218                        }
 219                        if (!(f = fopen(filename, "w")))
 220                                die("Could not open %s", filename);
 221                        if (obj->type == OBJ_BLOB) {
 222                                enum object_type type;
 223                                unsigned long size;
 224                                char *buf = read_sha1_file(obj->sha1,
 225                                                &type, &size);
 226                                if (buf) {
 227                                        if (fwrite(buf, size, 1, f) != 1)
 228                                                die("Could not write %s: %s",
 229                                                    filename, strerror(errno));
 230                                        free(buf);
 231                                }
 232                        } else
 233                                fprintf(f, "%s\n", sha1_to_hex(obj->sha1));
 234                        if (fclose(f))
 235                                die("Could not finish %s: %s",
 236                                    filename, strerror(errno));
 237                }
 238                return;
 239        }
 240
 241        /*
 242         * Otherwise? It's there, it's unreachable, and some other unreachable
 243         * object points to it. Ignore it - it's not interesting, and we showed
 244         * all the interesting cases above.
 245         */
 246}
 247
 248static void check_object(struct object *obj)
 249{
 250        if (verbose)
 251                fprintf(stderr, "Checking %s\n", sha1_to_hex(obj->sha1));
 252
 253        if (obj->flags & REACHABLE)
 254                check_reachable_object(obj);
 255        else
 256                check_unreachable_object(obj);
 257}
 258
 259static void check_connectivity(void)
 260{
 261        int i, max;
 262
 263        /* Traverse the pending reachable objects */
 264        traverse_reachable();
 265
 266        /* Look up all the requirements, warn about missing objects.. */
 267        max = get_max_object_index();
 268        if (verbose)
 269                fprintf(stderr, "Checking connectivity (%d objects)\n", max);
 270
 271        for (i = 0; i < max; i++) {
 272                struct object *obj = get_indexed_object(i);
 273
 274                if (obj)
 275                        check_object(obj);
 276        }
 277}
 278
 279static int fsck_sha1(const unsigned char *sha1)
 280{
 281        struct object *obj = parse_object(sha1);
 282        if (!obj) {
 283                errors_found |= ERROR_OBJECT;
 284                return error("%s: object corrupt or missing",
 285                             sha1_to_hex(sha1));
 286        }
 287        if (obj->flags & SEEN)
 288                return 0;
 289        obj->flags |= SEEN;
 290
 291        if (verbose)
 292                fprintf(stderr, "Checking %s %s\n",
 293                        typename(obj->type), sha1_to_hex(obj->sha1));
 294
 295        if (fsck_walk(obj, mark_used, 0))
 296                objerror(obj, "broken links");
 297        if (fsck_object(obj, check_strict, fsck_error_func))
 298                return -1;
 299
 300        if (obj->type == OBJ_TREE) {
 301                struct tree *item = (struct tree *) obj;
 302
 303                free(item->buffer);
 304                item->buffer = NULL;
 305        }
 306
 307        if (obj->type == OBJ_COMMIT) {
 308                struct commit *commit = (struct commit *) obj;
 309
 310                free(commit->buffer);
 311                commit->buffer = NULL;
 312
 313                if (!commit->parents && show_root)
 314                        printf("root %s\n", sha1_to_hex(commit->object.sha1));
 315        }
 316
 317        if (obj->type == OBJ_TAG) {
 318                struct tag *tag = (struct tag *) obj;
 319
 320                if (show_tags && tag->tagged) {
 321                        printf("tagged %s %s", typename(tag->tagged->type), sha1_to_hex(tag->tagged->sha1));
 322                        printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
 323                }
 324        }
 325
 326        return 0;
 327}
 328
 329/*
 330 * This is the sorting chunk size: make it reasonably
 331 * big so that we can sort well..
 332 */
 333#define MAX_SHA1_ENTRIES (1024)
 334
 335struct sha1_entry {
 336        unsigned long ino;
 337        unsigned char sha1[20];
 338};
 339
 340static struct {
 341        unsigned long nr;
 342        struct sha1_entry *entry[MAX_SHA1_ENTRIES];
 343} sha1_list;
 344
 345static int ino_compare(const void *_a, const void *_b)
 346{
 347        const struct sha1_entry *a = _a, *b = _b;
 348        unsigned long ino1 = a->ino, ino2 = b->ino;
 349        return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
 350}
 351
 352static void fsck_sha1_list(void)
 353{
 354        int i, nr = sha1_list.nr;
 355
 356        if (SORT_DIRENT)
 357                qsort(sha1_list.entry, nr,
 358                      sizeof(struct sha1_entry *), ino_compare);
 359        for (i = 0; i < nr; i++) {
 360                struct sha1_entry *entry = sha1_list.entry[i];
 361                unsigned char *sha1 = entry->sha1;
 362
 363                sha1_list.entry[i] = NULL;
 364                fsck_sha1(sha1);
 365                free(entry);
 366        }
 367        sha1_list.nr = 0;
 368}
 369
 370static void add_sha1_list(unsigned char *sha1, unsigned long ino)
 371{
 372        struct sha1_entry *entry = xmalloc(sizeof(*entry));
 373        int nr;
 374
 375        entry->ino = ino;
 376        hashcpy(entry->sha1, sha1);
 377        nr = sha1_list.nr;
 378        if (nr == MAX_SHA1_ENTRIES) {
 379                fsck_sha1_list();
 380                nr = 0;
 381        }
 382        sha1_list.entry[nr] = entry;
 383        sha1_list.nr = ++nr;
 384}
 385
 386static void fsck_dir(int i, char *path)
 387{
 388        DIR *dir = opendir(path);
 389        struct dirent *de;
 390
 391        if (!dir)
 392                return;
 393
 394        if (verbose)
 395                fprintf(stderr, "Checking directory %s\n", path);
 396
 397        while ((de = readdir(dir)) != NULL) {
 398                char name[100];
 399                unsigned char sha1[20];
 400
 401                if (is_dot_or_dotdot(de->d_name))
 402                        continue;
 403                if (strlen(de->d_name) == 38) {
 404                        sprintf(name, "%02x", i);
 405                        memcpy(name+2, de->d_name, 39);
 406                        if (get_sha1_hex(name, sha1) < 0)
 407                                break;
 408                        add_sha1_list(sha1, DIRENT_SORT_HINT(de));
 409                        continue;
 410                }
 411                if (!prefixcmp(de->d_name, "tmp_obj_"))
 412                        continue;
 413                fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
 414        }
 415        closedir(dir);
 416}
 417
 418static int default_refs;
 419
 420static int fsck_handle_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 421                const char *email, unsigned long timestamp, int tz,
 422                const char *message, void *cb_data)
 423{
 424        struct object *obj;
 425
 426        if (verbose)
 427                fprintf(stderr, "Checking reflog %s->%s\n",
 428                        sha1_to_hex(osha1), sha1_to_hex(nsha1));
 429
 430        if (!is_null_sha1(osha1)) {
 431                obj = lookup_object(osha1);
 432                if (obj) {
 433                        obj->used = 1;
 434                        mark_object_reachable(obj);
 435                }
 436        }
 437        obj = lookup_object(nsha1);
 438        if (obj) {
 439                obj->used = 1;
 440                mark_object_reachable(obj);
 441        }
 442        return 0;
 443}
 444
 445static int fsck_handle_reflog(const char *logname, const unsigned char *sha1, int flag, void *cb_data)
 446{
 447        for_each_reflog_ent(logname, fsck_handle_reflog_ent, NULL);
 448        return 0;
 449}
 450
 451static int is_branch(const char *refname)
 452{
 453        return !strcmp(refname, "HEAD") || !prefixcmp(refname, "refs/heads/");
 454}
 455
 456static int fsck_handle_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data)
 457{
 458        struct object *obj;
 459
 460        obj = parse_object(sha1);
 461        if (!obj) {
 462                error("%s: invalid sha1 pointer %s", refname, sha1_to_hex(sha1));
 463                /* We'll continue with the rest despite the error.. */
 464                return 0;
 465        }
 466        if (obj->type != OBJ_COMMIT && is_branch(refname))
 467                error("%s: not a commit", refname);
 468        default_refs++;
 469        obj->used = 1;
 470        mark_object_reachable(obj);
 471
 472        return 0;
 473}
 474
 475static void get_default_heads(void)
 476{
 477        if (head_points_at && !is_null_sha1(head_sha1))
 478                fsck_handle_ref("HEAD", head_sha1, 0, NULL);
 479        for_each_ref(fsck_handle_ref, NULL);
 480        if (include_reflogs)
 481                for_each_reflog(fsck_handle_reflog, NULL);
 482
 483        /*
 484         * Not having any default heads isn't really fatal, but
 485         * it does mean that "--unreachable" no longer makes any
 486         * sense (since in this case everything will obviously
 487         * be unreachable by definition.
 488         *
 489         * Showing dangling objects is valid, though (as those
 490         * dangling objects are likely lost heads).
 491         *
 492         * So we just print a warning about it, and clear the
 493         * "show_unreachable" flag.
 494         */
 495        if (!default_refs) {
 496                fprintf(stderr, "notice: No default references\n");
 497                show_unreachable = 0;
 498        }
 499}
 500
 501static void fsck_object_dir(const char *path)
 502{
 503        int i;
 504
 505        if (verbose)
 506                fprintf(stderr, "Checking object directory\n");
 507
 508        for (i = 0; i < 256; i++) {
 509                static char dir[4096];
 510                sprintf(dir, "%s/%02x", path, i);
 511                fsck_dir(i, dir);
 512        }
 513        fsck_sha1_list();
 514}
 515
 516static int fsck_head_link(void)
 517{
 518        int flag;
 519        int null_is_error = 0;
 520
 521        if (verbose)
 522                fprintf(stderr, "Checking HEAD link\n");
 523
 524        head_points_at = resolve_ref("HEAD", head_sha1, 0, &flag);
 525        if (!head_points_at)
 526                return error("Invalid HEAD");
 527        if (!strcmp(head_points_at, "HEAD"))
 528                /* detached HEAD */
 529                null_is_error = 1;
 530        else if (prefixcmp(head_points_at, "refs/heads/"))
 531                return error("HEAD points to something strange (%s)",
 532                             head_points_at);
 533        if (is_null_sha1(head_sha1)) {
 534                if (null_is_error)
 535                        return error("HEAD: detached HEAD points at nothing");
 536                fprintf(stderr, "notice: HEAD points to an unborn branch (%s)\n",
 537                        head_points_at + 11);
 538        }
 539        return 0;
 540}
 541
 542static int fsck_cache_tree(struct cache_tree *it)
 543{
 544        int i;
 545        int err = 0;
 546
 547        if (verbose)
 548                fprintf(stderr, "Checking cache tree\n");
 549
 550        if (0 <= it->entry_count) {
 551                struct object *obj = parse_object(it->sha1);
 552                if (!obj) {
 553                        error("%s: invalid sha1 pointer in cache-tree",
 554                              sha1_to_hex(it->sha1));
 555                        return 1;
 556                }
 557                mark_object_reachable(obj);
 558                obj->used = 1;
 559                if (obj->type != OBJ_TREE)
 560                        err |= objerror(obj, "non-tree in cache-tree");
 561        }
 562        for (i = 0; i < it->subtree_nr; i++)
 563                err |= fsck_cache_tree(it->down[i]->cache_tree);
 564        return err;
 565}
 566
 567static char const * const fsck_usage[] = {
 568        "git fsck [options] [<object>...]",
 569        NULL
 570};
 571
 572static struct option fsck_opts[] = {
 573        OPT__VERBOSE(&verbose),
 574        OPT_BOOLEAN(0, "unreachable", &show_unreachable, "show unreachable objects"),
 575        OPT_BOOLEAN(0, "tags", &show_tags, "report tags"),
 576        OPT_BOOLEAN(0, "root", &show_root, "report root nodes"),
 577        OPT_BOOLEAN(0, "cache", &keep_cache_objects, "make index objects head nodes"),
 578        OPT_BOOLEAN(0, "reflogs", &include_reflogs, "make reflogs head nodes (default)"),
 579        OPT_BOOLEAN(0, "full", &check_full, "also consider alternate objects"),
 580        OPT_BOOLEAN(0, "strict", &check_strict, "enable more strict checking"),
 581        OPT_BOOLEAN(0, "lost-found", &write_lost_and_found,
 582                                "write dangling objects in .git/lost-found"),
 583        OPT_END(),
 584};
 585
 586int cmd_fsck(int argc, const char **argv, const char *prefix)
 587{
 588        int i, heads;
 589        struct alternate_object_database *alt;
 590
 591        errors_found = 0;
 592
 593        argc = parse_options(argc, argv, fsck_opts, fsck_usage, 0);
 594        if (write_lost_and_found) {
 595                check_full = 1;
 596                include_reflogs = 0;
 597        }
 598
 599        fsck_head_link();
 600        fsck_object_dir(get_object_directory());
 601
 602        prepare_alt_odb();
 603        for (alt = alt_odb_list; alt; alt = alt->next) {
 604                char namebuf[PATH_MAX];
 605                int namelen = alt->name - alt->base;
 606                memcpy(namebuf, alt->base, namelen);
 607                namebuf[namelen - 1] = 0;
 608                fsck_object_dir(namebuf);
 609        }
 610
 611        if (check_full) {
 612                struct packed_git *p;
 613
 614                prepare_packed_git();
 615                for (p = packed_git; p; p = p->next)
 616                        /* verify gives error messages itself */
 617                        verify_pack(p);
 618
 619                for (p = packed_git; p; p = p->next) {
 620                        uint32_t j, num;
 621                        if (open_pack_index(p))
 622                                continue;
 623                        num = p->num_objects;
 624                        for (j = 0; j < num; j++)
 625                                fsck_sha1(nth_packed_object_sha1(p, j));
 626                }
 627        }
 628
 629        heads = 0;
 630        for (i = 0; i < argc; i++) {
 631                const char *arg = argv[i];
 632                unsigned char sha1[20];
 633                if (!get_sha1(arg, sha1)) {
 634                        struct object *obj = lookup_object(sha1);
 635
 636                        /* Error is printed by lookup_object(). */
 637                        if (!obj)
 638                                continue;
 639
 640                        obj->used = 1;
 641                        mark_object_reachable(obj);
 642                        heads++;
 643                        continue;
 644                }
 645                error("invalid parameter: expected sha1, got '%s'", arg);
 646        }
 647
 648        /*
 649         * If we've not been given any explicit head information, do the
 650         * default ones from .git/refs. We also consider the index file
 651         * in this case (ie this implies --cache).
 652         */
 653        if (!heads) {
 654                get_default_heads();
 655                keep_cache_objects = 1;
 656        }
 657
 658        if (keep_cache_objects) {
 659                read_cache();
 660                for (i = 0; i < active_nr; i++) {
 661                        unsigned int mode;
 662                        struct blob *blob;
 663                        struct object *obj;
 664
 665                        mode = active_cache[i]->ce_mode;
 666                        if (S_ISGITLINK(mode))
 667                                continue;
 668                        blob = lookup_blob(active_cache[i]->sha1);
 669                        if (!blob)
 670                                continue;
 671                        obj = &blob->object;
 672                        obj->used = 1;
 673                        mark_object_reachable(obj);
 674                }
 675                if (active_cache_tree)
 676                        fsck_cache_tree(active_cache_tree);
 677        }
 678
 679        check_connectivity();
 680        return errors_found;
 681}