fsck-objects.con commit Merge branch 'sp/merge' (early part) (6de3347)
   1#include "cache.h"
   2#include "commit.h"
   3#include "tree.h"
   4#include "blob.h"
   5#include "tag.h"
   6#include "refs.h"
   7#include "pack.h"
   8#include "cache-tree.h"
   9#include "tree-walk.h"
  10
  11#define REACHABLE 0x0001
  12#define SEEN      0x0002
  13
  14static int show_root;
  15static int show_tags;
  16static int show_unreachable;
  17static int check_full;
  18static int check_strict;
  19static int keep_cache_objects;
  20static unsigned char head_sha1[20];
  21
  22#ifdef NO_D_INO_IN_DIRENT
  23#define SORT_DIRENT 0
  24#define DIRENT_SORT_HINT(de) 0
  25#else
  26#define SORT_DIRENT 1
  27#define DIRENT_SORT_HINT(de) ((de)->d_ino)
  28#endif
  29
  30static void objreport(struct object *obj, const char *severity,
  31                      const char *err, va_list params)
  32{
  33        fprintf(stderr, "%s in %s %s: ",
  34                severity, typename(obj->type), sha1_to_hex(obj->sha1));
  35        vfprintf(stderr, err, params);
  36        fputs("\n", stderr);
  37}
  38
  39static int objerror(struct object *obj, const char *err, ...)
  40{
  41        va_list params;
  42        va_start(params, err);
  43        objreport(obj, "error", err, params);
  44        va_end(params);
  45        return -1;
  46}
  47
  48static int objwarning(struct object *obj, const char *err, ...)
  49{
  50        va_list params;
  51        va_start(params, err);
  52        objreport(obj, "warning", err, params);
  53        va_end(params);
  54        return -1;
  55}
  56
  57
  58static void check_connectivity(void)
  59{
  60        int i, max;
  61
  62        /* Look up all the requirements, warn about missing objects.. */
  63        max = get_max_object_index();
  64        for (i = 0; i < max; i++) {
  65                const struct object_refs *refs;
  66                struct object *obj = get_indexed_object(i);
  67
  68                if (!obj)
  69                        continue;
  70
  71                if (!obj->parsed) {
  72                        if (has_sha1_file(obj->sha1))
  73                                ; /* it is in pack */
  74                        else
  75                                printf("missing %s %s\n",
  76                                       typename(obj->type), sha1_to_hex(obj->sha1));
  77                        continue;
  78                }
  79
  80                refs = lookup_object_refs(obj);
  81                if (refs) {
  82                        unsigned j;
  83                        for (j = 0; j < refs->count; j++) {
  84                                struct object *ref = refs->ref[j];
  85                                if (ref->parsed ||
  86                                    (has_sha1_file(ref->sha1)))
  87                                        continue;
  88                                printf("broken link from %7s %s\n",
  89                                       typename(obj->type), sha1_to_hex(obj->sha1));
  90                                printf("              to %7s %s\n",
  91                                       typename(ref->type), sha1_to_hex(ref->sha1));
  92                        }
  93                }
  94
  95                if (show_unreachable && !(obj->flags & REACHABLE)) {
  96                        printf("unreachable %s %s\n",
  97                               typename(obj->type), sha1_to_hex(obj->sha1));
  98                        continue;
  99                }
 100
 101                if (!obj->used) {
 102                        printf("dangling %s %s\n", typename(obj->type),
 103                               sha1_to_hex(obj->sha1));
 104                }
 105        }
 106}
 107
 108/*
 109 * The entries in a tree are ordered in the _path_ order,
 110 * which means that a directory entry is ordered by adding
 111 * a slash to the end of it.
 112 *
 113 * So a directory called "a" is ordered _after_ a file
 114 * called "a.c", because "a/" sorts after "a.c".
 115 */
 116#define TREE_UNORDERED (-1)
 117#define TREE_HAS_DUPS  (-2)
 118
 119static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, const char *name2)
 120{
 121        int len1 = strlen(name1);
 122        int len2 = strlen(name2);
 123        int len = len1 < len2 ? len1 : len2;
 124        unsigned char c1, c2;
 125        int cmp;
 126
 127        cmp = memcmp(name1, name2, len);
 128        if (cmp < 0)
 129                return 0;
 130        if (cmp > 0)
 131                return TREE_UNORDERED;
 132
 133        /*
 134         * Ok, the first <len> characters are the same.
 135         * Now we need to order the next one, but turn
 136         * a '\0' into a '/' for a directory entry.
 137         */
 138        c1 = name1[len];
 139        c2 = name2[len];
 140        if (!c1 && !c2)
 141                /*
 142                 * git-write-tree used to write out a nonsense tree that has
 143                 * entries with the same name, one blob and one tree.  Make
 144                 * sure we do not have duplicate entries.
 145                 */
 146                return TREE_HAS_DUPS;
 147        if (!c1 && S_ISDIR(mode1))
 148                c1 = '/';
 149        if (!c2 && S_ISDIR(mode2))
 150                c2 = '/';
 151        return c1 < c2 ? 0 : TREE_UNORDERED;
 152}
 153
 154static int fsck_tree(struct tree *item)
 155{
 156        int retval;
 157        int has_full_path = 0;
 158        int has_zero_pad = 0;
 159        int has_bad_modes = 0;
 160        int has_dup_entries = 0;
 161        int not_properly_sorted = 0;
 162        struct tree_desc desc;
 163        unsigned o_mode;
 164        const char *o_name;
 165        const unsigned char *o_sha1;
 166
 167        desc.buf = item->buffer;
 168        desc.size = item->size;
 169
 170        o_mode = 0;
 171        o_name = NULL;
 172        o_sha1 = NULL;
 173        while (desc.size) {
 174                unsigned mode;
 175                const char *name;
 176                const unsigned char *sha1;
 177
 178                sha1 = tree_entry_extract(&desc, &name, &mode);
 179
 180                if (strchr(name, '/'))
 181                        has_full_path = 1;
 182                has_zero_pad |= *(char *)desc.buf == '0';
 183                update_tree_entry(&desc);
 184
 185                switch (mode) {
 186                /*
 187                 * Standard modes..
 188                 */
 189                case S_IFREG | 0755:
 190                case S_IFREG | 0644:
 191                case S_IFLNK:
 192                case S_IFDIR:
 193                        break;
 194                /*
 195                 * This is nonstandard, but we had a few of these
 196                 * early on when we honored the full set of mode
 197                 * bits..
 198                 */
 199                case S_IFREG | 0664:
 200                        if (!check_strict)
 201                                break;
 202                default:
 203                        has_bad_modes = 1;
 204                }
 205
 206                if (o_name) {
 207                        switch (verify_ordered(o_mode, o_name, mode, name)) {
 208                        case TREE_UNORDERED:
 209                                not_properly_sorted = 1;
 210                                break;
 211                        case TREE_HAS_DUPS:
 212                                has_dup_entries = 1;
 213                                break;
 214                        default:
 215                                break;
 216                        }
 217                }
 218
 219                o_mode = mode;
 220                o_name = name;
 221                o_sha1 = sha1;
 222        }
 223        free(item->buffer);
 224        item->buffer = NULL;
 225
 226        retval = 0;
 227        if (has_full_path) {
 228                objwarning(&item->object, "contains full pathnames");
 229        }
 230        if (has_zero_pad) {
 231                objwarning(&item->object, "contains zero-padded file modes");
 232        }
 233        if (has_bad_modes) {
 234                objwarning(&item->object, "contains bad file modes");
 235        }
 236        if (has_dup_entries) {
 237                retval = objerror(&item->object, "contains duplicate file entries");
 238        }
 239        if (not_properly_sorted) {
 240                retval = objerror(&item->object, "not properly sorted");
 241        }
 242        return retval;
 243}
 244
 245static int fsck_commit(struct commit *commit)
 246{
 247        char *buffer = commit->buffer;
 248        unsigned char tree_sha1[20], sha1[20];
 249
 250        if (memcmp(buffer, "tree ", 5))
 251                return objerror(&commit->object, "invalid format - expected 'tree' line");
 252        if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n')
 253                return objerror(&commit->object, "invalid 'tree' line format - bad sha1");
 254        buffer += 46;
 255        while (!memcmp(buffer, "parent ", 7)) {
 256                if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
 257                        return objerror(&commit->object, "invalid 'parent' line format - bad sha1");
 258                buffer += 48;
 259        }
 260        if (memcmp(buffer, "author ", 7))
 261                return objerror(&commit->object, "invalid format - expected 'author' line");
 262        free(commit->buffer);
 263        commit->buffer = NULL;
 264        if (!commit->tree)
 265                return objerror(&commit->object, "could not load commit's tree %s", tree_sha1);
 266        if (!commit->parents && show_root)
 267                printf("root %s\n", sha1_to_hex(commit->object.sha1));
 268        if (!commit->date)
 269                printf("bad commit date in %s\n", 
 270                       sha1_to_hex(commit->object.sha1));
 271        return 0;
 272}
 273
 274static int fsck_tag(struct tag *tag)
 275{
 276        struct object *tagged = tag->tagged;
 277
 278        if (!tagged) {
 279                return objerror(&tag->object, "could not load tagged object");
 280        }
 281        if (!show_tags)
 282                return 0;
 283
 284        printf("tagged %s %s", typename(tagged->type), sha1_to_hex(tagged->sha1));
 285        printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
 286        return 0;
 287}
 288
 289static int fsck_sha1(unsigned char *sha1)
 290{
 291        struct object *obj = parse_object(sha1);
 292        if (!obj)
 293                return error("%s: object corrupt or missing", sha1_to_hex(sha1));
 294        if (obj->flags & SEEN)
 295                return 0;
 296        obj->flags |= SEEN;
 297        if (obj->type == OBJ_BLOB)
 298                return 0;
 299        if (obj->type == OBJ_TREE)
 300                return fsck_tree((struct tree *) obj);
 301        if (obj->type == OBJ_COMMIT)
 302                return fsck_commit((struct commit *) obj);
 303        if (obj->type == OBJ_TAG)
 304                return fsck_tag((struct tag *) obj);
 305        /* By now, parse_object() would've returned NULL instead. */
 306        return objerror(obj, "unknown type '%d' (internal fsck error)", obj->type);
 307}
 308
 309/*
 310 * This is the sorting chunk size: make it reasonably
 311 * big so that we can sort well..
 312 */
 313#define MAX_SHA1_ENTRIES (1024)
 314
 315struct sha1_entry {
 316        unsigned long ino;
 317        unsigned char sha1[20];
 318};
 319
 320static struct {
 321        unsigned long nr;
 322        struct sha1_entry *entry[MAX_SHA1_ENTRIES];
 323} sha1_list;
 324
 325static int ino_compare(const void *_a, const void *_b)
 326{
 327        const struct sha1_entry *a = _a, *b = _b;
 328        unsigned long ino1 = a->ino, ino2 = b->ino;
 329        return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
 330}
 331
 332static void fsck_sha1_list(void)
 333{
 334        int i, nr = sha1_list.nr;
 335
 336        if (SORT_DIRENT)
 337                qsort(sha1_list.entry, nr,
 338                      sizeof(struct sha1_entry *), ino_compare);
 339        for (i = 0; i < nr; i++) {
 340                struct sha1_entry *entry = sha1_list.entry[i];
 341                unsigned char *sha1 = entry->sha1;
 342
 343                sha1_list.entry[i] = NULL;
 344                fsck_sha1(sha1);
 345                free(entry);
 346        }
 347        sha1_list.nr = 0;
 348}
 349
 350static void add_sha1_list(unsigned char *sha1, unsigned long ino)
 351{
 352        struct sha1_entry *entry = xmalloc(sizeof(*entry));
 353        int nr;
 354
 355        entry->ino = ino;
 356        hashcpy(entry->sha1, sha1);
 357        nr = sha1_list.nr;
 358        if (nr == MAX_SHA1_ENTRIES) {
 359                fsck_sha1_list();
 360                nr = 0;
 361        }
 362        sha1_list.entry[nr] = entry;
 363        sha1_list.nr = ++nr;
 364}
 365
 366static void fsck_dir(int i, char *path)
 367{
 368        DIR *dir = opendir(path);
 369        struct dirent *de;
 370
 371        if (!dir)
 372                return;
 373
 374        while ((de = readdir(dir)) != NULL) {
 375                char name[100];
 376                unsigned char sha1[20];
 377                int len = strlen(de->d_name);
 378
 379                switch (len) {
 380                case 2:
 381                        if (de->d_name[1] != '.')
 382                                break;
 383                case 1:
 384                        if (de->d_name[0] != '.')
 385                                break;
 386                        continue;
 387                case 38:
 388                        sprintf(name, "%02x", i);
 389                        memcpy(name+2, de->d_name, len+1);
 390                        if (get_sha1_hex(name, sha1) < 0)
 391                                break;
 392                        add_sha1_list(sha1, DIRENT_SORT_HINT(de));
 393                        continue;
 394                }
 395                fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
 396        }
 397        closedir(dir);
 398}
 399
 400static int default_refs;
 401
 402static int fsck_handle_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 403                const char *email, unsigned long timestamp, int tz,
 404                const char *message, void *cb_data)
 405{
 406        struct object *obj;
 407
 408        if (!is_null_sha1(osha1)) {
 409                obj = lookup_object(osha1);
 410                if (obj) {
 411                        obj->used = 1;
 412                        mark_reachable(obj, REACHABLE);
 413                }
 414        }
 415        obj = lookup_object(nsha1);
 416        if (obj) {
 417                obj->used = 1;
 418                mark_reachable(obj, REACHABLE);
 419        }
 420        return 0;
 421}
 422
 423static int fsck_handle_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data)
 424{
 425        struct object *obj;
 426
 427        obj = lookup_object(sha1);
 428        if (!obj) {
 429                if (has_sha1_file(sha1)) {
 430                        default_refs++;
 431                        return 0; /* it is in a pack */
 432                }
 433                error("%s: invalid sha1 pointer %s", refname, sha1_to_hex(sha1));
 434                /* We'll continue with the rest despite the error.. */
 435                return 0;
 436        }
 437        default_refs++;
 438        obj->used = 1;
 439        mark_reachable(obj, REACHABLE);
 440
 441        for_each_reflog_ent(refname, fsck_handle_reflog_ent, NULL);
 442
 443        return 0;
 444}
 445
 446static void get_default_heads(void)
 447{
 448        for_each_ref(fsck_handle_ref, NULL);
 449
 450        /*
 451         * Not having any default heads isn't really fatal, but
 452         * it does mean that "--unreachable" no longer makes any
 453         * sense (since in this case everything will obviously
 454         * be unreachable by definition.
 455         *
 456         * Showing dangling objects is valid, though (as those
 457         * dangling objects are likely lost heads).
 458         *
 459         * So we just print a warning about it, and clear the
 460         * "show_unreachable" flag.
 461         */
 462        if (!default_refs) {
 463                error("No default references");
 464                show_unreachable = 0;
 465        }
 466}
 467
 468static void fsck_object_dir(const char *path)
 469{
 470        int i;
 471        for (i = 0; i < 256; i++) {
 472                static char dir[4096];
 473                sprintf(dir, "%s/%02x", path, i);
 474                fsck_dir(i, dir);
 475        }
 476        fsck_sha1_list();
 477}
 478
 479static int fsck_head_link(void)
 480{
 481        unsigned char sha1[20];
 482        int flag;
 483        const char *head_points_at = resolve_ref("HEAD", sha1, 1, &flag);
 484
 485        if (!head_points_at || !(flag & REF_ISSYMREF))
 486                return error("HEAD is not a symbolic ref");
 487        if (strncmp(head_points_at, "refs/heads/", 11))
 488                return error("HEAD points to something strange (%s)",
 489                             head_points_at);
 490        if (is_null_sha1(sha1))
 491                return error("HEAD: not a valid git pointer");
 492        return 0;
 493}
 494
 495static int fsck_cache_tree(struct cache_tree *it)
 496{
 497        int i;
 498        int err = 0;
 499
 500        if (0 <= it->entry_count) {
 501                struct object *obj = parse_object(it->sha1);
 502                if (!obj) {
 503                        error("%s: invalid sha1 pointer in cache-tree",
 504                              sha1_to_hex(it->sha1));
 505                        return 1;
 506                }
 507                mark_reachable(obj, REACHABLE);
 508                obj->used = 1;
 509                if (obj->type != OBJ_TREE)
 510                        err |= objerror(obj, "non-tree in cache-tree");
 511        }
 512        for (i = 0; i < it->subtree_nr; i++)
 513                err |= fsck_cache_tree(it->down[i]->cache_tree);
 514        return err;
 515}
 516
 517int main(int argc, char **argv)
 518{
 519        int i, heads;
 520
 521        track_object_refs = 1;
 522        setup_git_directory();
 523
 524        for (i = 1; i < argc; i++) {
 525                const char *arg = argv[i];
 526
 527                if (!strcmp(arg, "--unreachable")) {
 528                        show_unreachable = 1;
 529                        continue;
 530                }
 531                if (!strcmp(arg, "--tags")) {
 532                        show_tags = 1;
 533                        continue;
 534                }
 535                if (!strcmp(arg, "--root")) {
 536                        show_root = 1;
 537                        continue;
 538                }
 539                if (!strcmp(arg, "--cache")) {
 540                        keep_cache_objects = 1;
 541                        continue;
 542                }
 543                if (!strcmp(arg, "--full")) {
 544                        check_full = 1;
 545                        continue;
 546                }
 547                if (!strcmp(arg, "--strict")) {
 548                        check_strict = 1;
 549                        continue;
 550                }
 551                if (*arg == '-')
 552                        usage("git-fsck-objects [--tags] [--root] [[--unreachable] [--cache] [--full] [--strict] <head-sha1>*]");
 553        }
 554
 555        fsck_head_link();
 556        fsck_object_dir(get_object_directory());
 557        if (check_full) {
 558                struct alternate_object_database *alt;
 559                struct packed_git *p;
 560                prepare_alt_odb();
 561                for (alt = alt_odb_list; alt; alt = alt->next) {
 562                        char namebuf[PATH_MAX];
 563                        int namelen = alt->name - alt->base;
 564                        memcpy(namebuf, alt->base, namelen);
 565                        namebuf[namelen - 1] = 0;
 566                        fsck_object_dir(namebuf);
 567                }
 568                prepare_packed_git();
 569                for (p = packed_git; p; p = p->next)
 570                        /* verify gives error messages itself */
 571                        verify_pack(p, 0);
 572
 573                for (p = packed_git; p; p = p->next) {
 574                        int num = num_packed_objects(p);
 575                        for (i = 0; i < num; i++) {
 576                                unsigned char sha1[20];
 577                                nth_packed_object_sha1(p, i, sha1);
 578                                fsck_sha1(sha1);
 579                        }
 580                }
 581        }
 582
 583        heads = 0;
 584        for (i = 1; i < argc; i++) {
 585                const char *arg = argv[i]; 
 586
 587                if (*arg == '-')
 588                        continue;
 589
 590                if (!get_sha1(arg, head_sha1)) {
 591                        struct object *obj = lookup_object(head_sha1);
 592
 593                        /* Error is printed by lookup_object(). */
 594                        if (!obj)
 595                                continue;
 596
 597                        obj->used = 1;
 598                        mark_reachable(obj, REACHABLE);
 599                        heads++;
 600                        continue;
 601                }
 602                error("invalid parameter: expected sha1, got '%s'", arg);
 603        }
 604
 605        /*
 606         * If we've not been given any explicit head information, do the
 607         * default ones from .git/refs. We also consider the index file
 608         * in this case (ie this implies --cache).
 609         */
 610        if (!heads) {
 611                get_default_heads();
 612                keep_cache_objects = 1;
 613        }
 614
 615        if (keep_cache_objects) {
 616                int i;
 617                read_cache();
 618                for (i = 0; i < active_nr; i++) {
 619                        struct blob *blob = lookup_blob(active_cache[i]->sha1);
 620                        struct object *obj;
 621                        if (!blob)
 622                                continue;
 623                        obj = &blob->object;
 624                        obj->used = 1;
 625                        mark_reachable(obj, REACHABLE);
 626                }
 627                if (active_cache_tree)
 628                        fsck_cache_tree(active_cache_tree);
 629        }
 630
 631        check_connectivity();
 632        return 0;
 633}