fsck-objects.con commit Merge branch 'jc/diff-test' into th/diff (26183e2)
   1#include <sys/types.h>
   2#include <dirent.h>
   3
   4#include "cache.h"
   5#include "commit.h"
   6#include "tree.h"
   7#include "blob.h"
   8#include "tag.h"
   9#include "refs.h"
  10#include "pack.h"
  11#include "cache-tree.h"
  12#include "tree-walk.h"
  13
  14#define REACHABLE 0x0001
  15#define SEEN      0x0002
  16
  17static int show_root = 0;
  18static int show_tags = 0;
  19static int show_unreachable = 0;
  20static int check_full = 0;
  21static int check_strict = 0;
  22static int keep_cache_objects = 0;
  23static unsigned char head_sha1[20];
  24
  25#ifdef NO_D_INO_IN_DIRENT
  26#define SORT_DIRENT 0
  27#define DIRENT_SORT_HINT(de) 0
  28#else
  29#define SORT_DIRENT 1
  30#define DIRENT_SORT_HINT(de) ((de)->d_ino)
  31#endif
  32
  33static void objreport(struct object *obj, const char *severity,
  34                      const char *err, va_list params)
  35{
  36        fprintf(stderr, "%s in %s %s: ",
  37                severity, typename(obj->type), sha1_to_hex(obj->sha1));
  38        vfprintf(stderr, err, params);
  39        fputs("\n", stderr);
  40}
  41
  42static int objerror(struct object *obj, const char *err, ...)
  43{
  44        va_list params;
  45        va_start(params, err);
  46        objreport(obj, "error", err, params);
  47        va_end(params);
  48        return -1;
  49}
  50
  51static int objwarning(struct object *obj, const char *err, ...)
  52{
  53        va_list params;
  54        va_start(params, err);
  55        objreport(obj, "warning", err, params);
  56        va_end(params);
  57        return -1;
  58}
  59
  60
  61static void check_connectivity(void)
  62{
  63        int i;
  64
  65        /* Look up all the requirements, warn about missing objects.. */
  66        for (i = 0; i < obj_allocs; i++) {
  67                const struct object_refs *refs;
  68                struct object *obj = objs[i];
  69
  70                if (!obj)
  71                        continue;
  72
  73                if (!obj->parsed) {
  74                        if (has_sha1_file(obj->sha1))
  75                                ; /* it is in pack */
  76                        else
  77                                printf("missing %s %s\n",
  78                                       typename(obj->type), sha1_to_hex(obj->sha1));
  79                        continue;
  80                }
  81
  82                refs = lookup_object_refs(obj);
  83                if (refs) {
  84                        unsigned j;
  85                        for (j = 0; j < refs->count; j++) {
  86                                struct object *ref = refs->ref[j];
  87                                if (ref->parsed ||
  88                                    (has_sha1_file(ref->sha1)))
  89                                        continue;
  90                                printf("broken link from %7s %s\n",
  91                                       typename(obj->type), sha1_to_hex(obj->sha1));
  92                                printf("              to %7s %s\n",
  93                                       typename(ref->type), sha1_to_hex(ref->sha1));
  94                        }
  95                }
  96
  97                if (show_unreachable && !(obj->flags & REACHABLE)) {
  98                        printf("unreachable %s %s\n",
  99                               typename(obj->type), sha1_to_hex(obj->sha1));
 100                        continue;
 101                }
 102
 103                if (!obj->used) {
 104                        printf("dangling %s %s\n", typename(obj->type),
 105                               sha1_to_hex(obj->sha1));
 106                }
 107        }
 108}
 109
 110/*
 111 * The entries in a tree are ordered in the _path_ order,
 112 * which means that a directory entry is ordered by adding
 113 * a slash to the end of it.
 114 *
 115 * So a directory called "a" is ordered _after_ a file
 116 * called "a.c", because "a/" sorts after "a.c".
 117 */
 118#define TREE_UNORDERED (-1)
 119#define TREE_HAS_DUPS  (-2)
 120
 121static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, const char *name2)
 122{
 123        int len1 = strlen(name1);
 124        int len2 = strlen(name2);
 125        int len = len1 < len2 ? len1 : len2;
 126        unsigned char c1, c2;
 127        int cmp;
 128
 129        cmp = memcmp(name1, name2, len);
 130        if (cmp < 0)
 131                return 0;
 132        if (cmp > 0)
 133                return TREE_UNORDERED;
 134
 135        /*
 136         * Ok, the first <len> characters are the same.
 137         * Now we need to order the next one, but turn
 138         * a '\0' into a '/' for a directory entry.
 139         */
 140        c1 = name1[len];
 141        c2 = name2[len];
 142        if (!c1 && !c2)
 143                /*
 144                 * git-write-tree used to write out a nonsense tree that has
 145                 * entries with the same name, one blob and one tree.  Make
 146                 * sure we do not have duplicate entries.
 147                 */
 148                return TREE_HAS_DUPS;
 149        if (!c1 && S_ISDIR(mode1))
 150                c1 = '/';
 151        if (!c2 && S_ISDIR(mode2))
 152                c2 = '/';
 153        return c1 < c2 ? 0 : TREE_UNORDERED;
 154}
 155
 156static int fsck_tree(struct tree *item)
 157{
 158        int retval;
 159        int has_full_path = 0;
 160        int has_zero_pad = 0;
 161        int has_bad_modes = 0;
 162        int has_dup_entries = 0;
 163        int not_properly_sorted = 0;
 164        struct tree_desc desc;
 165        unsigned o_mode;
 166        const char *o_name;
 167        const unsigned char *o_sha1;
 168
 169        desc.buf = item->buffer;
 170        desc.size = item->size;
 171
 172        o_mode = 0;
 173        o_name = NULL;
 174        o_sha1 = NULL;
 175        while (desc.size) {
 176                unsigned mode;
 177                const char *name;
 178                const unsigned char *sha1;
 179
 180                sha1 = tree_entry_extract(&desc, &name, &mode);
 181
 182                if (strchr(name, '/'))
 183                        has_full_path = 1;
 184                has_zero_pad |= *(char *)desc.buf == '0';
 185                update_tree_entry(&desc);
 186
 187                switch (mode) {
 188                /*
 189                 * Standard modes..
 190                 */
 191                case S_IFREG | 0755:
 192                case S_IFREG | 0644:
 193                case S_IFLNK:
 194                case S_IFDIR:
 195                        break;
 196                /*
 197                 * This is nonstandard, but we had a few of these
 198                 * early on when we honored the full set of mode
 199                 * bits..
 200                 */
 201                case S_IFREG | 0664:
 202                        if (!check_strict)
 203                                break;
 204                default:
 205                        has_bad_modes = 1;
 206                }
 207
 208                if (o_name) {
 209                        switch (verify_ordered(o_mode, o_name, mode, name)) {
 210                        case TREE_UNORDERED:
 211                                not_properly_sorted = 1;
 212                                break;
 213                        case TREE_HAS_DUPS:
 214                                has_dup_entries = 1;
 215                                break;
 216                        default:
 217                                break;
 218                        }
 219                }
 220
 221                o_mode = mode;
 222                o_name = name;
 223                o_sha1 = sha1;
 224        }
 225        free(item->buffer);
 226        item->buffer = NULL;
 227
 228        retval = 0;
 229        if (has_full_path) {
 230                objwarning(&item->object, "contains full pathnames");
 231        }
 232        if (has_zero_pad) {
 233                objwarning(&item->object, "contains zero-padded file modes");
 234        }
 235        if (has_bad_modes) {
 236                objwarning(&item->object, "contains bad file modes");
 237        }
 238        if (has_dup_entries) {
 239                retval = objerror(&item->object, "contains duplicate file entries");
 240        }
 241        if (not_properly_sorted) {
 242                retval = objerror(&item->object, "not properly sorted");
 243        }
 244        return retval;
 245}
 246
 247static int fsck_commit(struct commit *commit)
 248{
 249        char *buffer = commit->buffer;
 250        unsigned char tree_sha1[20], sha1[20];
 251
 252        if (memcmp(buffer, "tree ", 5))
 253                return objerror(&commit->object, "invalid format - expected 'tree' line");
 254        if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
 255                return objerror(&commit->object, "invalid 'tree' line format - bad sha1");
 256        buffer += 46;
 257        while (!memcmp(buffer, "parent ", 7)) {
 258                if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
 259                        return objerror(&commit->object, "invalid 'parent' line format - bad sha1");
 260                buffer += 48;
 261        }
 262        if (memcmp(buffer, "author ", 7))
 263                return objerror(&commit->object, "invalid format - expected 'author' line");
 264        free(commit->buffer);
 265        commit->buffer = NULL;
 266        if (!commit->tree)
 267                return objerror(&commit->object, "could not load commit's tree %s", tree_sha1);
 268        if (!commit->parents && show_root)
 269                printf("root %s\n", sha1_to_hex(commit->object.sha1));
 270        if (!commit->date)
 271                printf("bad commit date in %s\n", 
 272                       sha1_to_hex(commit->object.sha1));
 273        return 0;
 274}
 275
 276static int fsck_tag(struct tag *tag)
 277{
 278        struct object *tagged = tag->tagged;
 279
 280        if (!tagged) {
 281                return objerror(&tag->object, "could not load tagged object");
 282        }
 283        if (!show_tags)
 284                return 0;
 285
 286        printf("tagged %s %s", typename(tagged->type), sha1_to_hex(tagged->sha1));
 287        printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
 288        return 0;
 289}
 290
 291static int fsck_sha1(unsigned char *sha1)
 292{
 293        struct object *obj = parse_object(sha1);
 294        if (!obj)
 295                return error("%s: object not found", sha1_to_hex(sha1));
 296        if (obj->flags & SEEN)
 297                return 0;
 298        obj->flags |= SEEN;
 299        if (obj->type == TYPE_BLOB)
 300                return 0;
 301        if (obj->type == TYPE_TREE)
 302                return fsck_tree((struct tree *) obj);
 303        if (obj->type == TYPE_COMMIT)
 304                return fsck_commit((struct commit *) obj);
 305        if (obj->type == TYPE_TAG)
 306                return fsck_tag((struct tag *) obj);
 307        /* By now, parse_object() would've returned NULL instead. */
 308        return objerror(obj, "unknown type '%d' (internal fsck error)", obj->type);
 309}
 310
 311/*
 312 * This is the sorting chunk size: make it reasonably
 313 * big so that we can sort well..
 314 */
 315#define MAX_SHA1_ENTRIES (1024)
 316
 317struct sha1_entry {
 318        unsigned long ino;
 319        unsigned char sha1[20];
 320};
 321
 322static struct {
 323        unsigned long nr;
 324        struct sha1_entry *entry[MAX_SHA1_ENTRIES];
 325} sha1_list;
 326
 327static int ino_compare(const void *_a, const void *_b)
 328{
 329        const struct sha1_entry *a = _a, *b = _b;
 330        unsigned long ino1 = a->ino, ino2 = b->ino;
 331        return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
 332}
 333
 334static void fsck_sha1_list(void)
 335{
 336        int i, nr = sha1_list.nr;
 337
 338        if (SORT_DIRENT)
 339                qsort(sha1_list.entry, nr,
 340                      sizeof(struct sha1_entry *), ino_compare);
 341        for (i = 0; i < nr; i++) {
 342                struct sha1_entry *entry = sha1_list.entry[i];
 343                unsigned char *sha1 = entry->sha1;
 344
 345                sha1_list.entry[i] = NULL;
 346                fsck_sha1(sha1);
 347                free(entry);
 348        }
 349        sha1_list.nr = 0;
 350}
 351
 352static void add_sha1_list(unsigned char *sha1, unsigned long ino)
 353{
 354        struct sha1_entry *entry = xmalloc(sizeof(*entry));
 355        int nr;
 356
 357        entry->ino = ino;
 358        memcpy(entry->sha1, sha1, 20);
 359        nr = sha1_list.nr;
 360        if (nr == MAX_SHA1_ENTRIES) {
 361                fsck_sha1_list();
 362                nr = 0;
 363        }
 364        sha1_list.entry[nr] = entry;
 365        sha1_list.nr = ++nr;
 366}
 367
 368static int fsck_dir(int i, char *path)
 369{
 370        DIR *dir = opendir(path);
 371        struct dirent *de;
 372
 373        if (!dir)
 374                return 0;
 375
 376        while ((de = readdir(dir)) != NULL) {
 377                char name[100];
 378                unsigned char sha1[20];
 379                int len = strlen(de->d_name);
 380
 381                switch (len) {
 382                case 2:
 383                        if (de->d_name[1] != '.')
 384                                break;
 385                case 1:
 386                        if (de->d_name[0] != '.')
 387                                break;
 388                        continue;
 389                case 38:
 390                        sprintf(name, "%02x", i);
 391                        memcpy(name+2, de->d_name, len+1);
 392                        if (get_sha1_hex(name, sha1) < 0)
 393                                break;
 394                        add_sha1_list(sha1, DIRENT_SORT_HINT(de));
 395                        continue;
 396                }
 397                fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
 398        }
 399        closedir(dir);
 400        return 0;
 401}
 402
 403static int default_refs = 0;
 404
 405static int fsck_handle_ref(const char *refname, const unsigned char *sha1)
 406{
 407        struct object *obj;
 408
 409        obj = lookup_object(sha1);
 410        if (!obj) {
 411                if (has_sha1_file(sha1)) {
 412                        default_refs++;
 413                        return 0; /* it is in a pack */
 414                }
 415                error("%s: invalid sha1 pointer %s", refname, sha1_to_hex(sha1));
 416                /* We'll continue with the rest despite the error.. */
 417                return 0;
 418        }
 419        default_refs++;
 420        obj->used = 1;
 421        mark_reachable(obj, REACHABLE);
 422        return 0;
 423}
 424
 425static void get_default_heads(void)
 426{
 427        for_each_ref(fsck_handle_ref);
 428        if (!default_refs)
 429                die("No default references");
 430}
 431
 432static void fsck_object_dir(const char *path)
 433{
 434        int i;
 435        for (i = 0; i < 256; i++) {
 436                static char dir[4096];
 437                sprintf(dir, "%s/%02x", path, i);
 438                fsck_dir(i, dir);
 439        }
 440        fsck_sha1_list();
 441}
 442
 443static int fsck_head_link(void)
 444{
 445        unsigned char sha1[20];
 446        const char *git_HEAD = strdup(git_path("HEAD"));
 447        const char *git_refs_heads_master = resolve_ref(git_HEAD, sha1, 1);
 448        int pfxlen = strlen(git_HEAD) - 4; /* strip .../.git/ part */
 449
 450        if (!git_refs_heads_master)
 451                return error("HEAD is not a symbolic ref");
 452        if (strncmp(git_refs_heads_master + pfxlen, "refs/heads/", 11))
 453                return error("HEAD points to something strange (%s)",
 454                             git_refs_heads_master + pfxlen);
 455        if (!memcmp(null_sha1, sha1, 20))
 456                return error("HEAD: not a valid git pointer");
 457        return 0;
 458}
 459
 460static int fsck_cache_tree(struct cache_tree *it)
 461{
 462        int i;
 463        int err = 0;
 464
 465        if (0 <= it->entry_count) {
 466                struct object *obj = parse_object(it->sha1);
 467                if (!obj) {
 468                        error("%s: invalid sha1 pointer in cache-tree",
 469                              sha1_to_hex(it->sha1));
 470                        return 1;
 471                }
 472                mark_reachable(obj, REACHABLE);
 473                obj->used = 1;
 474                if (obj->type != TYPE_TREE)
 475                        err |= objerror(obj, "non-tree in cache-tree");
 476        }
 477        for (i = 0; i < it->subtree_nr; i++)
 478                err |= fsck_cache_tree(it->down[i]->cache_tree);
 479        return err;
 480}
 481
 482int main(int argc, char **argv)
 483{
 484        int i, heads;
 485
 486        track_object_refs = 1;
 487        setup_git_directory();
 488
 489        for (i = 1; i < argc; i++) {
 490                const char *arg = argv[i];
 491
 492                if (!strcmp(arg, "--unreachable")) {
 493                        show_unreachable = 1;
 494                        continue;
 495                }
 496                if (!strcmp(arg, "--tags")) {
 497                        show_tags = 1;
 498                        continue;
 499                }
 500                if (!strcmp(arg, "--root")) {
 501                        show_root = 1;
 502                        continue;
 503                }
 504                if (!strcmp(arg, "--cache")) {
 505                        keep_cache_objects = 1;
 506                        continue;
 507                }
 508                if (!strcmp(arg, "--full")) {
 509                        check_full = 1;
 510                        continue;
 511                }
 512                if (!strcmp(arg, "--strict")) {
 513                        check_strict = 1;
 514                        continue;
 515                }
 516                if (*arg == '-')
 517                        usage("git-fsck-objects [--tags] [--root] [[--unreachable] [--cache] [--full] [--strict] <head-sha1>*]");
 518        }
 519
 520        fsck_head_link();
 521        fsck_object_dir(get_object_directory());
 522        if (check_full) {
 523                struct alternate_object_database *alt;
 524                struct packed_git *p;
 525                prepare_alt_odb();
 526                for (alt = alt_odb_list; alt; alt = alt->next) {
 527                        char namebuf[PATH_MAX];
 528                        int namelen = alt->name - alt->base;
 529                        memcpy(namebuf, alt->base, namelen);
 530                        namebuf[namelen - 1] = 0;
 531                        fsck_object_dir(namebuf);
 532                }
 533                prepare_packed_git();
 534                for (p = packed_git; p; p = p->next)
 535                        /* verify gives error messages itself */
 536                        verify_pack(p, 0);
 537
 538                for (p = packed_git; p; p = p->next) {
 539                        int num = num_packed_objects(p);
 540                        for (i = 0; i < num; i++) {
 541                                unsigned char sha1[20];
 542                                nth_packed_object_sha1(p, i, sha1);
 543                                fsck_sha1(sha1);
 544                        }
 545                }
 546        }
 547
 548        heads = 0;
 549        for (i = 1; i < argc; i++) {
 550                const char *arg = argv[i]; 
 551
 552                if (*arg == '-')
 553                        continue;
 554
 555                if (!get_sha1(arg, head_sha1)) {
 556                        struct object *obj = lookup_object(head_sha1);
 557
 558                        /* Error is printed by lookup_object(). */
 559                        if (!obj)
 560                                continue;
 561
 562                        obj->used = 1;
 563                        mark_reachable(obj, REACHABLE);
 564                        heads++;
 565                        continue;
 566                }
 567                error("invalid parameter: expected sha1, got '%s'", arg);
 568        }
 569
 570        /*
 571         * If we've not been given any explicit head information, do the
 572         * default ones from .git/refs. We also consider the index file
 573         * in this case (ie this implies --cache).
 574         */
 575        if (!heads) {
 576                get_default_heads();
 577                keep_cache_objects = 1;
 578        }
 579
 580        if (keep_cache_objects) {
 581                int i;
 582                read_cache();
 583                for (i = 0; i < active_nr; i++) {
 584                        struct blob *blob = lookup_blob(active_cache[i]->sha1);
 585                        struct object *obj;
 586                        if (!blob)
 587                                continue;
 588                        obj = &blob->object;
 589                        obj->used = 1;
 590                        mark_reachable(obj, REACHABLE);
 591                }
 592                if (active_cache_tree)
 593                        fsck_cache_tree(active_cache_tree);
 594        }
 595
 596        check_connectivity();
 597        return 0;
 598}