fsck.con commit Merge branch 'rs/get-tagged-oid' (cf861cd)
   1#include "cache.h"
   2#include "object-store.h"
   3#include "repository.h"
   4#include "object.h"
   5#include "blob.h"
   6#include "tree.h"
   7#include "tree-walk.h"
   8#include "commit.h"
   9#include "tag.h"
  10#include "fsck.h"
  11#include "refs.h"
  12#include "utf8.h"
  13#include "decorate.h"
  14#include "oidset.h"
  15#include "packfile.h"
  16#include "submodule-config.h"
  17#include "config.h"
  18#include "help.h"
  19
  20static struct oidset gitmodules_found = OIDSET_INIT;
  21static struct oidset gitmodules_done = OIDSET_INIT;
  22
  23#define FSCK_FATAL -1
  24#define FSCK_INFO -2
  25
  26#define FOREACH_MSG_ID(FUNC) \
  27        /* fatal errors */ \
  28        FUNC(NUL_IN_HEADER, FATAL) \
  29        FUNC(UNTERMINATED_HEADER, FATAL) \
  30        /* errors */ \
  31        FUNC(BAD_DATE, ERROR) \
  32        FUNC(BAD_DATE_OVERFLOW, ERROR) \
  33        FUNC(BAD_EMAIL, ERROR) \
  34        FUNC(BAD_NAME, ERROR) \
  35        FUNC(BAD_OBJECT_SHA1, ERROR) \
  36        FUNC(BAD_PARENT_SHA1, ERROR) \
  37        FUNC(BAD_TAG_OBJECT, ERROR) \
  38        FUNC(BAD_TIMEZONE, ERROR) \
  39        FUNC(BAD_TREE, ERROR) \
  40        FUNC(BAD_TREE_SHA1, ERROR) \
  41        FUNC(BAD_TYPE, ERROR) \
  42        FUNC(DUPLICATE_ENTRIES, ERROR) \
  43        FUNC(MISSING_AUTHOR, ERROR) \
  44        FUNC(MISSING_COMMITTER, ERROR) \
  45        FUNC(MISSING_EMAIL, ERROR) \
  46        FUNC(MISSING_GRAFT, ERROR) \
  47        FUNC(MISSING_NAME_BEFORE_EMAIL, ERROR) \
  48        FUNC(MISSING_OBJECT, ERROR) \
  49        FUNC(MISSING_PARENT, ERROR) \
  50        FUNC(MISSING_SPACE_BEFORE_DATE, ERROR) \
  51        FUNC(MISSING_SPACE_BEFORE_EMAIL, ERROR) \
  52        FUNC(MISSING_TAG, ERROR) \
  53        FUNC(MISSING_TAG_ENTRY, ERROR) \
  54        FUNC(MISSING_TAG_OBJECT, ERROR) \
  55        FUNC(MISSING_TREE, ERROR) \
  56        FUNC(MISSING_TREE_OBJECT, ERROR) \
  57        FUNC(MISSING_TYPE, ERROR) \
  58        FUNC(MISSING_TYPE_ENTRY, ERROR) \
  59        FUNC(MULTIPLE_AUTHORS, ERROR) \
  60        FUNC(TAG_OBJECT_NOT_TAG, ERROR) \
  61        FUNC(TREE_NOT_SORTED, ERROR) \
  62        FUNC(UNKNOWN_TYPE, ERROR) \
  63        FUNC(ZERO_PADDED_DATE, ERROR) \
  64        FUNC(GITMODULES_MISSING, ERROR) \
  65        FUNC(GITMODULES_BLOB, ERROR) \
  66        FUNC(GITMODULES_LARGE, ERROR) \
  67        FUNC(GITMODULES_NAME, ERROR) \
  68        FUNC(GITMODULES_SYMLINK, ERROR) \
  69        FUNC(GITMODULES_URL, ERROR) \
  70        FUNC(GITMODULES_PATH, ERROR) \
  71        /* warnings */ \
  72        FUNC(BAD_FILEMODE, WARN) \
  73        FUNC(EMPTY_NAME, WARN) \
  74        FUNC(FULL_PATHNAME, WARN) \
  75        FUNC(HAS_DOT, WARN) \
  76        FUNC(HAS_DOTDOT, WARN) \
  77        FUNC(HAS_DOTGIT, WARN) \
  78        FUNC(NULL_SHA1, WARN) \
  79        FUNC(ZERO_PADDED_FILEMODE, WARN) \
  80        FUNC(NUL_IN_COMMIT, WARN) \
  81        /* infos (reported as warnings, but ignored by default) */ \
  82        FUNC(GITMODULES_PARSE, INFO) \
  83        FUNC(BAD_TAG_NAME, INFO) \
  84        FUNC(MISSING_TAGGER_ENTRY, INFO)
  85
  86#define MSG_ID(id, msg_type) FSCK_MSG_##id,
  87enum fsck_msg_id {
  88        FOREACH_MSG_ID(MSG_ID)
  89        FSCK_MSG_MAX
  90};
  91#undef MSG_ID
  92
  93#define STR(x) #x
  94#define MSG_ID(id, msg_type) { STR(id), NULL, NULL, FSCK_##msg_type },
  95static struct {
  96        const char *id_string;
  97        const char *downcased;
  98        const char *camelcased;
  99        int msg_type;
 100} msg_id_info[FSCK_MSG_MAX + 1] = {
 101        FOREACH_MSG_ID(MSG_ID)
 102        { NULL, NULL, NULL, -1 }
 103};
 104#undef MSG_ID
 105
 106static void prepare_msg_ids(void)
 107{
 108        int i;
 109
 110        if (msg_id_info[0].downcased)
 111                return;
 112
 113        /* convert id_string to lower case, without underscores. */
 114        for (i = 0; i < FSCK_MSG_MAX; i++) {
 115                const char *p = msg_id_info[i].id_string;
 116                int len = strlen(p);
 117                char *q = xmalloc(len);
 118
 119                msg_id_info[i].downcased = q;
 120                while (*p)
 121                        if (*p == '_')
 122                                p++;
 123                        else
 124                                *(q)++ = tolower(*(p)++);
 125                *q = '\0';
 126
 127                p = msg_id_info[i].id_string;
 128                q = xmalloc(len);
 129                msg_id_info[i].camelcased = q;
 130                while (*p) {
 131                        if (*p == '_') {
 132                                p++;
 133                                if (*p)
 134                                        *q++ = *p++;
 135                        } else {
 136                                *q++ = tolower(*p++);
 137                        }
 138                }
 139                *q = '\0';
 140        }
 141}
 142
 143static int parse_msg_id(const char *text)
 144{
 145        int i;
 146
 147        prepare_msg_ids();
 148
 149        for (i = 0; i < FSCK_MSG_MAX; i++)
 150                if (!strcmp(text, msg_id_info[i].downcased))
 151                        return i;
 152
 153        return -1;
 154}
 155
 156void list_config_fsck_msg_ids(struct string_list *list, const char *prefix)
 157{
 158        int i;
 159
 160        prepare_msg_ids();
 161
 162        for (i = 0; i < FSCK_MSG_MAX; i++)
 163                list_config_item(list, prefix, msg_id_info[i].camelcased);
 164}
 165
 166static int fsck_msg_type(enum fsck_msg_id msg_id,
 167        struct fsck_options *options)
 168{
 169        int msg_type;
 170
 171        assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);
 172
 173        if (options->msg_type)
 174                msg_type = options->msg_type[msg_id];
 175        else {
 176                msg_type = msg_id_info[msg_id].msg_type;
 177                if (options->strict && msg_type == FSCK_WARN)
 178                        msg_type = FSCK_ERROR;
 179        }
 180
 181        return msg_type;
 182}
 183
 184static int parse_msg_type(const char *str)
 185{
 186        if (!strcmp(str, "error"))
 187                return FSCK_ERROR;
 188        else if (!strcmp(str, "warn"))
 189                return FSCK_WARN;
 190        else if (!strcmp(str, "ignore"))
 191                return FSCK_IGNORE;
 192        else
 193                die("Unknown fsck message type: '%s'", str);
 194}
 195
 196int is_valid_msg_type(const char *msg_id, const char *msg_type)
 197{
 198        if (parse_msg_id(msg_id) < 0)
 199                return 0;
 200        parse_msg_type(msg_type);
 201        return 1;
 202}
 203
 204void fsck_set_msg_type(struct fsck_options *options,
 205                const char *msg_id, const char *msg_type)
 206{
 207        int id = parse_msg_id(msg_id), type;
 208
 209        if (id < 0)
 210                die("Unhandled message id: %s", msg_id);
 211        type = parse_msg_type(msg_type);
 212
 213        if (type != FSCK_ERROR && msg_id_info[id].msg_type == FSCK_FATAL)
 214                die("Cannot demote %s to %s", msg_id, msg_type);
 215
 216        if (!options->msg_type) {
 217                int i;
 218                int *msg_type;
 219                ALLOC_ARRAY(msg_type, FSCK_MSG_MAX);
 220                for (i = 0; i < FSCK_MSG_MAX; i++)
 221                        msg_type[i] = fsck_msg_type(i, options);
 222                options->msg_type = msg_type;
 223        }
 224
 225        options->msg_type[id] = type;
 226}
 227
 228void fsck_set_msg_types(struct fsck_options *options, const char *values)
 229{
 230        char *buf = xstrdup(values), *to_free = buf;
 231        int done = 0;
 232
 233        while (!done) {
 234                int len = strcspn(buf, " ,|"), equal;
 235
 236                done = !buf[len];
 237                if (!len) {
 238                        buf++;
 239                        continue;
 240                }
 241                buf[len] = '\0';
 242
 243                for (equal = 0;
 244                     equal < len && buf[equal] != '=' && buf[equal] != ':';
 245                     equal++)
 246                        buf[equal] = tolower(buf[equal]);
 247                buf[equal] = '\0';
 248
 249                if (!strcmp(buf, "skiplist")) {
 250                        if (equal == len)
 251                                die("skiplist requires a path");
 252                        oidset_parse_file(&options->skiplist, buf + equal + 1);
 253                        buf += len + 1;
 254                        continue;
 255                }
 256
 257                if (equal == len)
 258                        die("Missing '=': '%s'", buf);
 259
 260                fsck_set_msg_type(options, buf, buf + equal + 1);
 261                buf += len + 1;
 262        }
 263        free(to_free);
 264}
 265
 266static void append_msg_id(struct strbuf *sb, const char *msg_id)
 267{
 268        for (;;) {
 269                char c = *(msg_id)++;
 270
 271                if (!c)
 272                        break;
 273                if (c != '_')
 274                        strbuf_addch(sb, tolower(c));
 275                else {
 276                        assert(*msg_id);
 277                        strbuf_addch(sb, *(msg_id)++);
 278                }
 279        }
 280
 281        strbuf_addstr(sb, ": ");
 282}
 283
 284static int object_on_skiplist(struct fsck_options *opts, struct object *obj)
 285{
 286        return opts && obj && oidset_contains(&opts->skiplist, &obj->oid);
 287}
 288
 289__attribute__((format (printf, 4, 5)))
 290static int report(struct fsck_options *options, struct object *object,
 291        enum fsck_msg_id id, const char *fmt, ...)
 292{
 293        va_list ap;
 294        struct strbuf sb = STRBUF_INIT;
 295        int msg_type = fsck_msg_type(id, options), result;
 296
 297        if (msg_type == FSCK_IGNORE)
 298                return 0;
 299
 300        if (object_on_skiplist(options, object))
 301                return 0;
 302
 303        if (msg_type == FSCK_FATAL)
 304                msg_type = FSCK_ERROR;
 305        else if (msg_type == FSCK_INFO)
 306                msg_type = FSCK_WARN;
 307
 308        append_msg_id(&sb, msg_id_info[id].id_string);
 309
 310        va_start(ap, fmt);
 311        strbuf_vaddf(&sb, fmt, ap);
 312        result = options->error_func(options, object, msg_type, sb.buf);
 313        strbuf_release(&sb);
 314        va_end(ap);
 315
 316        return result;
 317}
 318
 319static char *get_object_name(struct fsck_options *options, struct object *obj)
 320{
 321        if (!options->object_names)
 322                return NULL;
 323        return lookup_decoration(options->object_names, obj);
 324}
 325
 326static void put_object_name(struct fsck_options *options, struct object *obj,
 327        const char *fmt, ...)
 328{
 329        va_list ap;
 330        struct strbuf buf = STRBUF_INIT;
 331        char *existing;
 332
 333        if (!options->object_names)
 334                return;
 335        existing = lookup_decoration(options->object_names, obj);
 336        if (existing)
 337                return;
 338        va_start(ap, fmt);
 339        strbuf_vaddf(&buf, fmt, ap);
 340        add_decoration(options->object_names, obj, strbuf_detach(&buf, NULL));
 341        va_end(ap);
 342}
 343
 344static const char *describe_object(struct fsck_options *o, struct object *obj)
 345{
 346        static struct strbuf buf = STRBUF_INIT;
 347        char *name;
 348
 349        strbuf_reset(&buf);
 350        strbuf_addstr(&buf, oid_to_hex(&obj->oid));
 351        if (o->object_names && (name = lookup_decoration(o->object_names, obj)))
 352                strbuf_addf(&buf, " (%s)", name);
 353
 354        return buf.buf;
 355}
 356
 357static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *options)
 358{
 359        struct tree_desc desc;
 360        struct name_entry entry;
 361        int res = 0;
 362        const char *name;
 363
 364        if (parse_tree(tree))
 365                return -1;
 366
 367        name = get_object_name(options, &tree->object);
 368        if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
 369                return -1;
 370        while (tree_entry_gently(&desc, &entry)) {
 371                struct object *obj;
 372                int result;
 373
 374                if (S_ISGITLINK(entry.mode))
 375                        continue;
 376
 377                if (S_ISDIR(entry.mode)) {
 378                        obj = (struct object *)lookup_tree(the_repository, &entry.oid);
 379                        if (name && obj)
 380                                put_object_name(options, obj, "%s%s/", name,
 381                                        entry.path);
 382                        result = options->walk(obj, OBJ_TREE, data, options);
 383                }
 384                else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) {
 385                        obj = (struct object *)lookup_blob(the_repository, &entry.oid);
 386                        if (name && obj)
 387                                put_object_name(options, obj, "%s%s", name,
 388                                        entry.path);
 389                        result = options->walk(obj, OBJ_BLOB, data, options);
 390                }
 391                else {
 392                        result = error("in tree %s: entry %s has bad mode %.6o",
 393                                        describe_object(options, &tree->object), entry.path, entry.mode);
 394                }
 395                if (result < 0)
 396                        return result;
 397                if (!res)
 398                        res = result;
 399        }
 400        return res;
 401}
 402
 403static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_options *options)
 404{
 405        int counter = 0, generation = 0, name_prefix_len = 0;
 406        struct commit_list *parents;
 407        int res;
 408        int result;
 409        const char *name;
 410
 411        if (parse_commit(commit))
 412                return -1;
 413
 414        name = get_object_name(options, &commit->object);
 415        if (name)
 416                put_object_name(options, &get_commit_tree(commit)->object,
 417                                "%s:", name);
 418
 419        result = options->walk((struct object *)get_commit_tree(commit),
 420                               OBJ_TREE, data, options);
 421        if (result < 0)
 422                return result;
 423        res = result;
 424
 425        parents = commit->parents;
 426        if (name && parents) {
 427                int len = strlen(name), power;
 428
 429                if (len && name[len - 1] == '^') {
 430                        generation = 1;
 431                        name_prefix_len = len - 1;
 432                }
 433                else { /* parse ~<generation> suffix */
 434                        for (generation = 0, power = 1;
 435                             len && isdigit(name[len - 1]);
 436                             power *= 10)
 437                                generation += power * (name[--len] - '0');
 438                        if (power > 1 && len && name[len - 1] == '~')
 439                                name_prefix_len = len - 1;
 440                }
 441        }
 442
 443        while (parents) {
 444                if (name) {
 445                        struct object *obj = &parents->item->object;
 446
 447                        if (counter++)
 448                                put_object_name(options, obj, "%s^%d",
 449                                        name, counter);
 450                        else if (generation > 0)
 451                                put_object_name(options, obj, "%.*s~%d",
 452                                        name_prefix_len, name, generation + 1);
 453                        else
 454                                put_object_name(options, obj, "%s^", name);
 455                }
 456                result = options->walk((struct object *)parents->item, OBJ_COMMIT, data, options);
 457                if (result < 0)
 458                        return result;
 459                if (!res)
 460                        res = result;
 461                parents = parents->next;
 462        }
 463        return res;
 464}
 465
 466static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *options)
 467{
 468        char *name = get_object_name(options, &tag->object);
 469
 470        if (parse_tag(tag))
 471                return -1;
 472        if (name)
 473                put_object_name(options, tag->tagged, "%s", name);
 474        return options->walk(tag->tagged, OBJ_ANY, data, options);
 475}
 476
 477int fsck_walk(struct object *obj, void *data, struct fsck_options *options)
 478{
 479        if (!obj)
 480                return -1;
 481
 482        if (obj->type == OBJ_NONE)
 483                parse_object(the_repository, &obj->oid);
 484
 485        switch (obj->type) {
 486        case OBJ_BLOB:
 487                return 0;
 488        case OBJ_TREE:
 489                return fsck_walk_tree((struct tree *)obj, data, options);
 490        case OBJ_COMMIT:
 491                return fsck_walk_commit((struct commit *)obj, data, options);
 492        case OBJ_TAG:
 493                return fsck_walk_tag((struct tag *)obj, data, options);
 494        default:
 495                error("Unknown object type for %s", describe_object(options, obj));
 496                return -1;
 497        }
 498}
 499
 500/*
 501 * The entries in a tree are ordered in the _path_ order,
 502 * which means that a directory entry is ordered by adding
 503 * a slash to the end of it.
 504 *
 505 * So a directory called "a" is ordered _after_ a file
 506 * called "a.c", because "a/" sorts after "a.c".
 507 */
 508#define TREE_UNORDERED (-1)
 509#define TREE_HAS_DUPS  (-2)
 510
 511static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, const char *name2)
 512{
 513        int len1 = strlen(name1);
 514        int len2 = strlen(name2);
 515        int len = len1 < len2 ? len1 : len2;
 516        unsigned char c1, c2;
 517        int cmp;
 518
 519        cmp = memcmp(name1, name2, len);
 520        if (cmp < 0)
 521                return 0;
 522        if (cmp > 0)
 523                return TREE_UNORDERED;
 524
 525        /*
 526         * Ok, the first <len> characters are the same.
 527         * Now we need to order the next one, but turn
 528         * a '\0' into a '/' for a directory entry.
 529         */
 530        c1 = name1[len];
 531        c2 = name2[len];
 532        if (!c1 && !c2)
 533                /*
 534                 * git-write-tree used to write out a nonsense tree that has
 535                 * entries with the same name, one blob and one tree.  Make
 536                 * sure we do not have duplicate entries.
 537                 */
 538                return TREE_HAS_DUPS;
 539        if (!c1 && S_ISDIR(mode1))
 540                c1 = '/';
 541        if (!c2 && S_ISDIR(mode2))
 542                c2 = '/';
 543        return c1 < c2 ? 0 : TREE_UNORDERED;
 544}
 545
 546static int fsck_tree(struct tree *item, struct fsck_options *options)
 547{
 548        int retval = 0;
 549        int has_null_sha1 = 0;
 550        int has_full_path = 0;
 551        int has_empty_name = 0;
 552        int has_dot = 0;
 553        int has_dotdot = 0;
 554        int has_dotgit = 0;
 555        int has_zero_pad = 0;
 556        int has_bad_modes = 0;
 557        int has_dup_entries = 0;
 558        int not_properly_sorted = 0;
 559        struct tree_desc desc;
 560        unsigned o_mode;
 561        const char *o_name;
 562
 563        if (init_tree_desc_gently(&desc, item->buffer, item->size)) {
 564                retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
 565                return retval;
 566        }
 567
 568        o_mode = 0;
 569        o_name = NULL;
 570
 571        while (desc.size) {
 572                unsigned short mode;
 573                const char *name;
 574                const struct object_id *oid;
 575
 576                oid = tree_entry_extract(&desc, &name, &mode);
 577
 578                has_null_sha1 |= is_null_oid(oid);
 579                has_full_path |= !!strchr(name, '/');
 580                has_empty_name |= !*name;
 581                has_dot |= !strcmp(name, ".");
 582                has_dotdot |= !strcmp(name, "..");
 583                has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
 584                has_zero_pad |= *(char *)desc.buffer == '0';
 585
 586                if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
 587                        if (!S_ISLNK(mode))
 588                                oidset_insert(&gitmodules_found, oid);
 589                        else
 590                                retval += report(options, &item->object,
 591                                                 FSCK_MSG_GITMODULES_SYMLINK,
 592                                                 ".gitmodules is a symbolic link");
 593                }
 594
 595                if (update_tree_entry_gently(&desc)) {
 596                        retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
 597                        break;
 598                }
 599
 600                switch (mode) {
 601                /*
 602                 * Standard modes..
 603                 */
 604                case S_IFREG | 0755:
 605                case S_IFREG | 0644:
 606                case S_IFLNK:
 607                case S_IFDIR:
 608                case S_IFGITLINK:
 609                        break;
 610                /*
 611                 * This is nonstandard, but we had a few of these
 612                 * early on when we honored the full set of mode
 613                 * bits..
 614                 */
 615                case S_IFREG | 0664:
 616                        if (!options->strict)
 617                                break;
 618                        /* fallthrough */
 619                default:
 620                        has_bad_modes = 1;
 621                }
 622
 623                if (o_name) {
 624                        switch (verify_ordered(o_mode, o_name, mode, name)) {
 625                        case TREE_UNORDERED:
 626                                not_properly_sorted = 1;
 627                                break;
 628                        case TREE_HAS_DUPS:
 629                                has_dup_entries = 1;
 630                                break;
 631                        default:
 632                                break;
 633                        }
 634                }
 635
 636                o_mode = mode;
 637                o_name = name;
 638        }
 639
 640        if (has_null_sha1)
 641                retval += report(options, &item->object, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1");
 642        if (has_full_path)
 643                retval += report(options, &item->object, FSCK_MSG_FULL_PATHNAME, "contains full pathnames");
 644        if (has_empty_name)
 645                retval += report(options, &item->object, FSCK_MSG_EMPTY_NAME, "contains empty pathname");
 646        if (has_dot)
 647                retval += report(options, &item->object, FSCK_MSG_HAS_DOT, "contains '.'");
 648        if (has_dotdot)
 649                retval += report(options, &item->object, FSCK_MSG_HAS_DOTDOT, "contains '..'");
 650        if (has_dotgit)
 651                retval += report(options, &item->object, FSCK_MSG_HAS_DOTGIT, "contains '.git'");
 652        if (has_zero_pad)
 653                retval += report(options, &item->object, FSCK_MSG_ZERO_PADDED_FILEMODE, "contains zero-padded file modes");
 654        if (has_bad_modes)
 655                retval += report(options, &item->object, FSCK_MSG_BAD_FILEMODE, "contains bad file modes");
 656        if (has_dup_entries)
 657                retval += report(options, &item->object, FSCK_MSG_DUPLICATE_ENTRIES, "contains duplicate file entries");
 658        if (not_properly_sorted)
 659                retval += report(options, &item->object, FSCK_MSG_TREE_NOT_SORTED, "not properly sorted");
 660        return retval;
 661}
 662
 663static int verify_headers(const void *data, unsigned long size,
 664                          struct object *obj, struct fsck_options *options)
 665{
 666        const char *buffer = (const char *)data;
 667        unsigned long i;
 668
 669        for (i = 0; i < size; i++) {
 670                switch (buffer[i]) {
 671                case '\0':
 672                        return report(options, obj,
 673                                FSCK_MSG_NUL_IN_HEADER,
 674                                "unterminated header: NUL at offset %ld", i);
 675                case '\n':
 676                        if (i + 1 < size && buffer[i + 1] == '\n')
 677                                return 0;
 678                }
 679        }
 680
 681        /*
 682         * We did not find double-LF that separates the header
 683         * and the body.  Not having a body is not a crime but
 684         * we do want to see the terminating LF for the last header
 685         * line.
 686         */
 687        if (size && buffer[size - 1] == '\n')
 688                return 0;
 689
 690        return report(options, obj,
 691                FSCK_MSG_UNTERMINATED_HEADER, "unterminated header");
 692}
 693
 694static int fsck_ident(const char **ident, struct object *obj, struct fsck_options *options)
 695{
 696        const char *p = *ident;
 697        char *end;
 698
 699        *ident = strchrnul(*ident, '\n');
 700        if (**ident == '\n')
 701                (*ident)++;
 702
 703        if (*p == '<')
 704                return report(options, obj, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
 705        p += strcspn(p, "<>\n");
 706        if (*p == '>')
 707                return report(options, obj, FSCK_MSG_BAD_NAME, "invalid author/committer line - bad name");
 708        if (*p != '<')
 709                return report(options, obj, FSCK_MSG_MISSING_EMAIL, "invalid author/committer line - missing email");
 710        if (p[-1] != ' ')
 711                return report(options, obj, FSCK_MSG_MISSING_SPACE_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
 712        p++;
 713        p += strcspn(p, "<>\n");
 714        if (*p != '>')
 715                return report(options, obj, FSCK_MSG_BAD_EMAIL, "invalid author/committer line - bad email");
 716        p++;
 717        if (*p != ' ')
 718                return report(options, obj, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date");
 719        p++;
 720        if (*p == '0' && p[1] != ' ')
 721                return report(options, obj, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
 722        if (date_overflows(parse_timestamp(p, &end, 10)))
 723                return report(options, obj, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow");
 724        if ((end == p || *end != ' '))
 725                return report(options, obj, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date");
 726        p = end + 1;
 727        if ((*p != '+' && *p != '-') ||
 728            !isdigit(p[1]) ||
 729            !isdigit(p[2]) ||
 730            !isdigit(p[3]) ||
 731            !isdigit(p[4]) ||
 732            (p[5] != '\n'))
 733                return report(options, obj, FSCK_MSG_BAD_TIMEZONE, "invalid author/committer line - bad time zone");
 734        p += 6;
 735        return 0;
 736}
 737
 738static int fsck_commit_buffer(struct commit *commit, const char *buffer,
 739        unsigned long size, struct fsck_options *options)
 740{
 741        struct object_id tree_oid, oid;
 742        struct commit_graft *graft;
 743        unsigned parent_count, parent_line_count = 0, author_count;
 744        int err;
 745        const char *buffer_begin = buffer;
 746        const char *p;
 747
 748        if (verify_headers(buffer, size, &commit->object, options))
 749                return -1;
 750
 751        if (!skip_prefix(buffer, "tree ", &buffer))
 752                return report(options, &commit->object, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
 753        if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
 754                err = report(options, &commit->object, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
 755                if (err)
 756                        return err;
 757        }
 758        buffer = p + 1;
 759        while (skip_prefix(buffer, "parent ", &buffer)) {
 760                if (parse_oid_hex(buffer, &oid, &p) || *p != '\n') {
 761                        err = report(options, &commit->object, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
 762                        if (err)
 763                                return err;
 764                }
 765                buffer = p + 1;
 766                parent_line_count++;
 767        }
 768        graft = lookup_commit_graft(the_repository, &commit->object.oid);
 769        parent_count = commit_list_count(commit->parents);
 770        if (graft) {
 771                if (graft->nr_parent == -1 && !parent_count)
 772                        ; /* shallow commit */
 773                else if (graft->nr_parent != parent_count) {
 774                        err = report(options, &commit->object, FSCK_MSG_MISSING_GRAFT, "graft objects missing");
 775                        if (err)
 776                                return err;
 777                }
 778        } else {
 779                if (parent_count != parent_line_count) {
 780                        err = report(options, &commit->object, FSCK_MSG_MISSING_PARENT, "parent objects missing");
 781                        if (err)
 782                                return err;
 783                }
 784        }
 785        author_count = 0;
 786        while (skip_prefix(buffer, "author ", &buffer)) {
 787                author_count++;
 788                err = fsck_ident(&buffer, &commit->object, options);
 789                if (err)
 790                        return err;
 791        }
 792        if (author_count < 1)
 793                err = report(options, &commit->object, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
 794        else if (author_count > 1)
 795                err = report(options, &commit->object, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
 796        if (err)
 797                return err;
 798        if (!skip_prefix(buffer, "committer ", &buffer))
 799                return report(options, &commit->object, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
 800        err = fsck_ident(&buffer, &commit->object, options);
 801        if (err)
 802                return err;
 803        if (!get_commit_tree(commit)) {
 804                err = report(options, &commit->object, FSCK_MSG_BAD_TREE, "could not load commit's tree %s", oid_to_hex(&tree_oid));
 805                if (err)
 806                        return err;
 807        }
 808        if (memchr(buffer_begin, '\0', size)) {
 809                err = report(options, &commit->object, FSCK_MSG_NUL_IN_COMMIT,
 810                             "NUL byte in the commit object body");
 811                if (err)
 812                        return err;
 813        }
 814        return 0;
 815}
 816
 817static int fsck_commit(struct commit *commit, const char *data,
 818        unsigned long size, struct fsck_options *options)
 819{
 820        const char *buffer = data ?  data : get_commit_buffer(commit, &size);
 821        int ret = fsck_commit_buffer(commit, buffer, size, options);
 822        if (!data)
 823                unuse_commit_buffer(commit, buffer);
 824        return ret;
 825}
 826
 827static int fsck_tag_buffer(struct tag *tag, const char *data,
 828        unsigned long size, struct fsck_options *options)
 829{
 830        struct object_id oid;
 831        int ret = 0;
 832        const char *buffer;
 833        char *to_free = NULL, *eol;
 834        struct strbuf sb = STRBUF_INIT;
 835        const char *p;
 836
 837        if (data)
 838                buffer = data;
 839        else {
 840                enum object_type type;
 841
 842                buffer = to_free =
 843                        read_object_file(&tag->object.oid, &type, &size);
 844                if (!buffer)
 845                        return report(options, &tag->object,
 846                                FSCK_MSG_MISSING_TAG_OBJECT,
 847                                "cannot read tag object");
 848
 849                if (type != OBJ_TAG) {
 850                        ret = report(options, &tag->object,
 851                                FSCK_MSG_TAG_OBJECT_NOT_TAG,
 852                                "expected tag got %s",
 853                            type_name(type));
 854                        goto done;
 855                }
 856        }
 857
 858        ret = verify_headers(buffer, size, &tag->object, options);
 859        if (ret)
 860                goto done;
 861
 862        if (!skip_prefix(buffer, "object ", &buffer)) {
 863                ret = report(options, &tag->object, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line");
 864                goto done;
 865        }
 866        if (parse_oid_hex(buffer, &oid, &p) || *p != '\n') {
 867                ret = report(options, &tag->object, FSCK_MSG_BAD_OBJECT_SHA1, "invalid 'object' line format - bad sha1");
 868                if (ret)
 869                        goto done;
 870        }
 871        buffer = p + 1;
 872
 873        if (!skip_prefix(buffer, "type ", &buffer)) {
 874                ret = report(options, &tag->object, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line");
 875                goto done;
 876        }
 877        eol = strchr(buffer, '\n');
 878        if (!eol) {
 879                ret = report(options, &tag->object, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line");
 880                goto done;
 881        }
 882        if (type_from_string_gently(buffer, eol - buffer, 1) < 0)
 883                ret = report(options, &tag->object, FSCK_MSG_BAD_TYPE, "invalid 'type' value");
 884        if (ret)
 885                goto done;
 886        buffer = eol + 1;
 887
 888        if (!skip_prefix(buffer, "tag ", &buffer)) {
 889                ret = report(options, &tag->object, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line");
 890                goto done;
 891        }
 892        eol = strchr(buffer, '\n');
 893        if (!eol) {
 894                ret = report(options, &tag->object, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line");
 895                goto done;
 896        }
 897        strbuf_addf(&sb, "refs/tags/%.*s", (int)(eol - buffer), buffer);
 898        if (check_refname_format(sb.buf, 0)) {
 899                ret = report(options, &tag->object, FSCK_MSG_BAD_TAG_NAME,
 900                           "invalid 'tag' name: %.*s",
 901                           (int)(eol - buffer), buffer);
 902                if (ret)
 903                        goto done;
 904        }
 905        buffer = eol + 1;
 906
 907        if (!skip_prefix(buffer, "tagger ", &buffer)) {
 908                /* early tags do not contain 'tagger' lines; warn only */
 909                ret = report(options, &tag->object, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line");
 910                if (ret)
 911                        goto done;
 912        }
 913        else
 914                ret = fsck_ident(&buffer, &tag->object, options);
 915
 916done:
 917        strbuf_release(&sb);
 918        free(to_free);
 919        return ret;
 920}
 921
 922static int fsck_tag(struct tag *tag, const char *data,
 923        unsigned long size, struct fsck_options *options)
 924{
 925        struct object *tagged = tag->tagged;
 926
 927        if (!tagged)
 928                return report(options, &tag->object, FSCK_MSG_BAD_TAG_OBJECT, "could not load tagged object");
 929
 930        return fsck_tag_buffer(tag, data, size, options);
 931}
 932
 933struct fsck_gitmodules_data {
 934        struct object *obj;
 935        struct fsck_options *options;
 936        int ret;
 937};
 938
 939static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
 940{
 941        struct fsck_gitmodules_data *data = vdata;
 942        const char *subsection, *key;
 943        int subsection_len;
 944        char *name;
 945
 946        if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
 947            !subsection)
 948                return 0;
 949
 950        name = xmemdupz(subsection, subsection_len);
 951        if (check_submodule_name(name) < 0)
 952                data->ret |= report(data->options, data->obj,
 953                                    FSCK_MSG_GITMODULES_NAME,
 954                                    "disallowed submodule name: %s",
 955                                    name);
 956        if (!strcmp(key, "url") && value &&
 957            looks_like_command_line_option(value))
 958                data->ret |= report(data->options, data->obj,
 959                                    FSCK_MSG_GITMODULES_URL,
 960                                    "disallowed submodule url: %s",
 961                                    value);
 962        if (!strcmp(key, "path") && value &&
 963            looks_like_command_line_option(value))
 964                data->ret |= report(data->options, data->obj,
 965                                    FSCK_MSG_GITMODULES_PATH,
 966                                    "disallowed submodule path: %s",
 967                                    value);
 968        free(name);
 969
 970        return 0;
 971}
 972
 973static int fsck_blob(struct blob *blob, const char *buf,
 974                     unsigned long size, struct fsck_options *options)
 975{
 976        struct fsck_gitmodules_data data;
 977        struct config_options config_opts = { 0 };
 978
 979        if (!oidset_contains(&gitmodules_found, &blob->object.oid))
 980                return 0;
 981        oidset_insert(&gitmodules_done, &blob->object.oid);
 982
 983        if (object_on_skiplist(options, &blob->object))
 984                return 0;
 985
 986        if (!buf) {
 987                /*
 988                 * A missing buffer here is a sign that the caller found the
 989                 * blob too gigantic to load into memory. Let's just consider
 990                 * that an error.
 991                 */
 992                return report(options, &blob->object,
 993                              FSCK_MSG_GITMODULES_LARGE,
 994                              ".gitmodules too large to parse");
 995        }
 996
 997        data.obj = &blob->object;
 998        data.options = options;
 999        data.ret = 0;
1000        config_opts.error_action = CONFIG_ERROR_SILENT;
1001        if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
1002                                ".gitmodules", buf, size, &data, &config_opts))
1003                data.ret |= report(options, &blob->object,
1004                                   FSCK_MSG_GITMODULES_PARSE,
1005                                   "could not parse gitmodules blob");
1006
1007        return data.ret;
1008}
1009
1010int fsck_object(struct object *obj, void *data, unsigned long size,
1011        struct fsck_options *options)
1012{
1013        if (!obj)
1014                return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
1015
1016        if (obj->type == OBJ_BLOB)
1017                return fsck_blob((struct blob *)obj, data, size, options);
1018        if (obj->type == OBJ_TREE)
1019                return fsck_tree((struct tree *) obj, options);
1020        if (obj->type == OBJ_COMMIT)
1021                return fsck_commit((struct commit *) obj, (const char *) data,
1022                        size, options);
1023        if (obj->type == OBJ_TAG)
1024                return fsck_tag((struct tag *) obj, (const char *) data,
1025                        size, options);
1026
1027        return report(options, obj, FSCK_MSG_UNKNOWN_TYPE, "unknown type '%d' (internal fsck error)",
1028                          obj->type);
1029}
1030
1031int fsck_error_function(struct fsck_options *o,
1032        struct object *obj, int msg_type, const char *message)
1033{
1034        if (msg_type == FSCK_WARN) {
1035                warning("object %s: %s", describe_object(o, obj), message);
1036                return 0;
1037        }
1038        error("object %s: %s", describe_object(o, obj), message);
1039        return 1;
1040}
1041
1042int fsck_finish(struct fsck_options *options)
1043{
1044        int ret = 0;
1045        struct oidset_iter iter;
1046        const struct object_id *oid;
1047
1048        oidset_iter_init(&gitmodules_found, &iter);
1049        while ((oid = oidset_iter_next(&iter))) {
1050                struct blob *blob;
1051                enum object_type type;
1052                unsigned long size;
1053                char *buf;
1054
1055                if (oidset_contains(&gitmodules_done, oid))
1056                        continue;
1057
1058                blob = lookup_blob(the_repository, oid);
1059                if (!blob) {
1060                        struct object *obj = lookup_unknown_object(oid);
1061                        ret |= report(options, obj,
1062                                      FSCK_MSG_GITMODULES_BLOB,
1063                                      "non-blob found at .gitmodules");
1064                        continue;
1065                }
1066
1067                buf = read_object_file(oid, &type, &size);
1068                if (!buf) {
1069                        if (is_promisor_object(&blob->object.oid))
1070                                continue;
1071                        ret |= report(options, &blob->object,
1072                                      FSCK_MSG_GITMODULES_MISSING,
1073                                      "unable to read .gitmodules blob");
1074                        continue;
1075                }
1076
1077                if (type == OBJ_BLOB)
1078                        ret |= fsck_blob(blob, buf, size, options);
1079                else
1080                        ret |= report(options, &blob->object,
1081                                      FSCK_MSG_GITMODULES_BLOB,
1082                                      "non-blob found at .gitmodules");
1083                free(buf);
1084        }
1085
1086
1087        oidset_clear(&gitmodules_found);
1088        oidset_clear(&gitmodules_done);
1089        return ret;
1090}