95604c4a63b88f7935a8bfb890cbdbee7e396ffe
   1/*
   2 * GIT - The information manager from hell
   3 *
   4 * Copyright (C) Linus Torvalds, 2005
   5 */
   6#include "cache.h"
   7#include "builtin.h"
   8#include "parse-options.h"
   9#include "userdiff.h"
  10#include "streaming.h"
  11#include "tree-walk.h"
  12
  13struct batch_options {
  14        int enabled;
  15        int follow_symlinks;
  16        int print_contents;
  17        int buffer_output;
  18        int all_objects;
  19        const char *format;
  20};
  21
  22static int cat_one_file(int opt, const char *exp_type, const char *obj_name,
  23                        int unknown_type)
  24{
  25        unsigned char sha1[20];
  26        enum object_type type;
  27        char *buf;
  28        unsigned long size;
  29        struct object_context obj_context;
  30        struct object_info oi = {NULL};
  31        struct strbuf sb = STRBUF_INIT;
  32        unsigned flags = LOOKUP_REPLACE_OBJECT;
  33
  34        if (unknown_type)
  35                flags |= LOOKUP_UNKNOWN_OBJECT;
  36
  37        if (get_sha1_with_context(obj_name, 0, sha1, &obj_context))
  38                die("Not a valid object name %s", obj_name);
  39
  40        buf = NULL;
  41        switch (opt) {
  42        case 't':
  43                oi.typename = &sb;
  44                if (sha1_object_info_extended(sha1, &oi, flags) < 0)
  45                        die("git cat-file: could not get object info");
  46                if (sb.len) {
  47                        printf("%s\n", sb.buf);
  48                        strbuf_release(&sb);
  49                        return 0;
  50                }
  51                break;
  52
  53        case 's':
  54                oi.sizep = &size;
  55                if (sha1_object_info_extended(sha1, &oi, flags) < 0)
  56                        die("git cat-file: could not get object info");
  57                printf("%lu\n", size);
  58                return 0;
  59
  60        case 'e':
  61                return !has_sha1_file(sha1);
  62
  63        case 'c':
  64                if (!obj_context.path[0])
  65                        die("git cat-file --textconv %s: <object> must be <sha1:path>",
  66                            obj_name);
  67
  68                if (textconv_object(obj_context.path, obj_context.mode, sha1, 1, &buf, &size))
  69                        break;
  70
  71        case 'p':
  72                type = sha1_object_info(sha1, NULL);
  73                if (type < 0)
  74                        die("Not a valid object name %s", obj_name);
  75
  76                /* custom pretty-print here */
  77                if (type == OBJ_TREE) {
  78                        const char *ls_args[3] = { NULL };
  79                        ls_args[0] =  "ls-tree";
  80                        ls_args[1] =  obj_name;
  81                        return cmd_ls_tree(2, ls_args, NULL);
  82                }
  83
  84                if (type == OBJ_BLOB)
  85                        return stream_blob_to_fd(1, sha1, NULL, 0);
  86                buf = read_sha1_file(sha1, &type, &size);
  87                if (!buf)
  88                        die("Cannot read object %s", obj_name);
  89
  90                /* otherwise just spit out the data */
  91                break;
  92
  93        case 0:
  94                if (type_from_string(exp_type) == OBJ_BLOB) {
  95                        unsigned char blob_sha1[20];
  96                        if (sha1_object_info(sha1, NULL) == OBJ_TAG) {
  97                                char *buffer = read_sha1_file(sha1, &type, &size);
  98                                const char *target;
  99                                if (!skip_prefix(buffer, "object ", &target) ||
 100                                    get_sha1_hex(target, blob_sha1))
 101                                        die("%s not a valid tag", sha1_to_hex(sha1));
 102                                free(buffer);
 103                        } else
 104                                hashcpy(blob_sha1, sha1);
 105
 106                        if (sha1_object_info(blob_sha1, NULL) == OBJ_BLOB)
 107                                return stream_blob_to_fd(1, blob_sha1, NULL, 0);
 108                        /*
 109                         * we attempted to dereference a tag to a blob
 110                         * and failed; there may be new dereference
 111                         * mechanisms this code is not aware of.
 112                         * fall-back to the usual case.
 113                         */
 114                }
 115                buf = read_object_with_reference(sha1, exp_type, &size, NULL);
 116                break;
 117
 118        default:
 119                die("git cat-file: unknown option: %s", exp_type);
 120        }
 121
 122        if (!buf)
 123                die("git cat-file %s: bad file", obj_name);
 124
 125        write_or_die(1, buf, size);
 126        return 0;
 127}
 128
 129struct expand_data {
 130        unsigned char sha1[20];
 131        enum object_type type;
 132        unsigned long size;
 133        unsigned long disk_size;
 134        const char *rest;
 135        unsigned char delta_base_sha1[20];
 136
 137        /*
 138         * If mark_query is true, we do not expand anything, but rather
 139         * just mark the object_info with items we wish to query.
 140         */
 141        int mark_query;
 142
 143        /*
 144         * Whether to split the input on whitespace before feeding it to
 145         * get_sha1; this is decided during the mark_query phase based on
 146         * whether we have a %(rest) token in our format.
 147         */
 148        int split_on_whitespace;
 149
 150        /*
 151         * After a mark_query run, this object_info is set up to be
 152         * passed to sha1_object_info_extended. It will point to the data
 153         * elements above, so you can retrieve the response from there.
 154         */
 155        struct object_info info;
 156};
 157
 158static int is_atom(const char *atom, const char *s, int slen)
 159{
 160        int alen = strlen(atom);
 161        return alen == slen && !memcmp(atom, s, alen);
 162}
 163
 164static void expand_atom(struct strbuf *sb, const char *atom, int len,
 165                        void *vdata)
 166{
 167        struct expand_data *data = vdata;
 168
 169        if (is_atom("objectname", atom, len)) {
 170                if (!data->mark_query)
 171                        strbuf_addstr(sb, sha1_to_hex(data->sha1));
 172        } else if (is_atom("objecttype", atom, len)) {
 173                if (data->mark_query)
 174                        data->info.typep = &data->type;
 175                else
 176                        strbuf_addstr(sb, typename(data->type));
 177        } else if (is_atom("objectsize", atom, len)) {
 178                if (data->mark_query)
 179                        data->info.sizep = &data->size;
 180                else
 181                        strbuf_addf(sb, "%lu", data->size);
 182        } else if (is_atom("objectsize:disk", atom, len)) {
 183                if (data->mark_query)
 184                        data->info.disk_sizep = &data->disk_size;
 185                else
 186                        strbuf_addf(sb, "%lu", data->disk_size);
 187        } else if (is_atom("rest", atom, len)) {
 188                if (data->mark_query)
 189                        data->split_on_whitespace = 1;
 190                else if (data->rest)
 191                        strbuf_addstr(sb, data->rest);
 192        } else if (is_atom("deltabase", atom, len)) {
 193                if (data->mark_query)
 194                        data->info.delta_base_sha1 = data->delta_base_sha1;
 195                else
 196                        strbuf_addstr(sb, sha1_to_hex(data->delta_base_sha1));
 197        } else
 198                die("unknown format element: %.*s", len, atom);
 199}
 200
 201static size_t expand_format(struct strbuf *sb, const char *start, void *data)
 202{
 203        const char *end;
 204
 205        if (*start != '(')
 206                return 0;
 207        end = strchr(start + 1, ')');
 208        if (!end)
 209                die("format element '%s' does not end in ')'", start);
 210
 211        expand_atom(sb, start + 1, end - start - 1, data);
 212
 213        return end - start + 1;
 214}
 215
 216static void batch_write(struct batch_options *opt, const void *data, int len)
 217{
 218        if (opt->buffer_output) {
 219                if (fwrite(data, 1, len, stdout) != len)
 220                        die_errno("unable to write to stdout");
 221        } else
 222                write_or_die(1, data, len);
 223}
 224
 225static void print_object_or_die(struct batch_options *opt, struct expand_data *data)
 226{
 227        const unsigned char *sha1 = data->sha1;
 228
 229        assert(data->info.typep);
 230
 231        if (data->type == OBJ_BLOB) {
 232                if (opt->buffer_output)
 233                        fflush(stdout);
 234                if (stream_blob_to_fd(1, sha1, NULL, 0) < 0)
 235                        die("unable to stream %s to stdout", sha1_to_hex(sha1));
 236        }
 237        else {
 238                enum object_type type;
 239                unsigned long size;
 240                void *contents;
 241
 242                contents = read_sha1_file(sha1, &type, &size);
 243                if (!contents)
 244                        die("object %s disappeared", sha1_to_hex(sha1));
 245                if (type != data->type)
 246                        die("object %s changed type!?", sha1_to_hex(sha1));
 247                if (data->info.sizep && size != data->size)
 248                        die("object %s changed size!?", sha1_to_hex(sha1));
 249
 250                batch_write(opt, contents, size);
 251                free(contents);
 252        }
 253}
 254
 255static void batch_object_write(const char *obj_name, struct batch_options *opt,
 256                               struct expand_data *data)
 257{
 258        struct strbuf buf = STRBUF_INIT;
 259
 260        if (sha1_object_info_extended(data->sha1, &data->info, LOOKUP_REPLACE_OBJECT) < 0) {
 261                printf("%s missing\n", obj_name ? obj_name : sha1_to_hex(data->sha1));
 262                fflush(stdout);
 263                return;
 264        }
 265
 266        strbuf_expand(&buf, opt->format, expand_format, data);
 267        strbuf_addch(&buf, '\n');
 268        batch_write(opt, buf.buf, buf.len);
 269        strbuf_release(&buf);
 270
 271        if (opt->print_contents) {
 272                print_object_or_die(opt, data);
 273                batch_write(opt, "\n", 1);
 274        }
 275}
 276
 277static void batch_one_object(const char *obj_name, struct batch_options *opt,
 278                             struct expand_data *data)
 279{
 280        struct object_context ctx;
 281        int flags = opt->follow_symlinks ? GET_SHA1_FOLLOW_SYMLINKS : 0;
 282        enum follow_symlinks_result result;
 283
 284        result = get_sha1_with_context(obj_name, flags, data->sha1, &ctx);
 285        if (result != FOUND) {
 286                switch (result) {
 287                case MISSING_OBJECT:
 288                        printf("%s missing\n", obj_name);
 289                        break;
 290                case DANGLING_SYMLINK:
 291                        printf("dangling %"PRIuMAX"\n%s\n",
 292                               (uintmax_t)strlen(obj_name), obj_name);
 293                        break;
 294                case SYMLINK_LOOP:
 295                        printf("loop %"PRIuMAX"\n%s\n",
 296                               (uintmax_t)strlen(obj_name), obj_name);
 297                        break;
 298                case NOT_DIR:
 299                        printf("notdir %"PRIuMAX"\n%s\n",
 300                               (uintmax_t)strlen(obj_name), obj_name);
 301                        break;
 302                default:
 303                        die("BUG: unknown get_sha1_with_context result %d\n",
 304                               result);
 305                        break;
 306                }
 307                fflush(stdout);
 308                return;
 309        }
 310
 311        if (ctx.mode == 0) {
 312                printf("symlink %"PRIuMAX"\n%s\n",
 313                       (uintmax_t)ctx.symlink_path.len,
 314                       ctx.symlink_path.buf);
 315                fflush(stdout);
 316                return;
 317        }
 318
 319        batch_object_write(obj_name, opt, data);
 320}
 321
 322struct object_cb_data {
 323        struct batch_options *opt;
 324        struct expand_data *expand;
 325};
 326
 327static int batch_object_cb(const unsigned char *sha1,
 328                           struct object_cb_data *data)
 329{
 330        hashcpy(data->expand->sha1, sha1);
 331        batch_object_write(NULL, data->opt, data->expand);
 332        return 0;
 333}
 334
 335static int batch_loose_object(const unsigned char *sha1,
 336                              const char *path,
 337                              void *data)
 338{
 339        return batch_object_cb(sha1, data);
 340}
 341
 342static int batch_packed_object(const unsigned char *sha1,
 343                               struct packed_git *pack,
 344                               uint32_t pos,
 345                               void *data)
 346{
 347        return batch_object_cb(sha1, data);
 348}
 349
 350static int batch_objects(struct batch_options *opt)
 351{
 352        struct strbuf buf = STRBUF_INIT;
 353        struct expand_data data;
 354        int save_warning;
 355        int retval = 0;
 356
 357        if (!opt->format)
 358                opt->format = "%(objectname) %(objecttype) %(objectsize)";
 359
 360        /*
 361         * Expand once with our special mark_query flag, which will prime the
 362         * object_info to be handed to sha1_object_info_extended for each
 363         * object.
 364         */
 365        memset(&data, 0, sizeof(data));
 366        data.mark_query = 1;
 367        strbuf_expand(&buf, opt->format, expand_format, &data);
 368        data.mark_query = 0;
 369
 370        /*
 371         * If we are printing out the object, then always fill in the type,
 372         * since we will want to decide whether or not to stream.
 373         */
 374        if (opt->print_contents)
 375                data.info.typep = &data.type;
 376
 377        if (opt->all_objects) {
 378                struct object_cb_data cb;
 379                cb.opt = opt;
 380                cb.expand = &data;
 381                for_each_loose_object(batch_loose_object, &cb, 0);
 382                for_each_packed_object(batch_packed_object, &cb, 0);
 383                return 0;
 384        }
 385
 386        /*
 387         * We are going to call get_sha1 on a potentially very large number of
 388         * objects. In most large cases, these will be actual object sha1s. The
 389         * cost to double-check that each one is not also a ref (just so we can
 390         * warn) ends up dwarfing the actual cost of the object lookups
 391         * themselves. We can work around it by just turning off the warning.
 392         */
 393        save_warning = warn_on_object_refname_ambiguity;
 394        warn_on_object_refname_ambiguity = 0;
 395
 396        while (strbuf_getline(&buf, stdin, '\n') != EOF) {
 397                if (data.split_on_whitespace) {
 398                        /*
 399                         * Split at first whitespace, tying off the beginning
 400                         * of the string and saving the remainder (or NULL) in
 401                         * data.rest.
 402                         */
 403                        char *p = strpbrk(buf.buf, " \t");
 404                        if (p) {
 405                                while (*p && strchr(" \t", *p))
 406                                        *p++ = '\0';
 407                        }
 408                        data.rest = p;
 409                }
 410
 411                batch_one_object(buf.buf, opt, &data);
 412        }
 413
 414        strbuf_release(&buf);
 415        warn_on_object_refname_ambiguity = save_warning;
 416        return retval;
 417}
 418
 419static const char * const cat_file_usage[] = {
 420        N_("git cat-file (-t [--allow-unknown-type]|-s [--allow-unknown-type]|-e|-p|<type>|--textconv) <object>"),
 421        N_("git cat-file (--batch | --batch-check) [--follow-symlinks] < <list-of-objects>"),
 422        NULL
 423};
 424
 425static int git_cat_file_config(const char *var, const char *value, void *cb)
 426{
 427        if (userdiff_config(var, value) < 0)
 428                return -1;
 429
 430        return git_default_config(var, value, cb);
 431}
 432
 433static int batch_option_callback(const struct option *opt,
 434                                 const char *arg,
 435                                 int unset)
 436{
 437        struct batch_options *bo = opt->value;
 438
 439        if (bo->enabled) {
 440                return 1;
 441        }
 442
 443        bo->enabled = 1;
 444        bo->print_contents = !strcmp(opt->long_name, "batch");
 445        bo->format = arg;
 446
 447        return 0;
 448}
 449
 450int cmd_cat_file(int argc, const char **argv, const char *prefix)
 451{
 452        int opt = 0;
 453        const char *exp_type = NULL, *obj_name = NULL;
 454        struct batch_options batch = {0};
 455        int unknown_type = 0;
 456
 457        const struct option options[] = {
 458                OPT_GROUP(N_("<type> can be one of: blob, tree, commit, tag")),
 459                OPT_CMDMODE('t', NULL, &opt, N_("show object type"), 't'),
 460                OPT_CMDMODE('s', NULL, &opt, N_("show object size"), 's'),
 461                OPT_CMDMODE('e', NULL, &opt,
 462                            N_("exit with zero when there's no error"), 'e'),
 463                OPT_CMDMODE('p', NULL, &opt, N_("pretty-print object's content"), 'p'),
 464                OPT_CMDMODE(0, "textconv", &opt,
 465                            N_("for blob objects, run textconv on object's content"), 'c'),
 466                OPT_BOOL(0, "allow-unknown-type", &unknown_type,
 467                          N_("allow -s and -t to work with broken/corrupt objects")),
 468                OPT_BOOL(0, "buffer", &batch.buffer_output, N_("buffer --batch output")),
 469                { OPTION_CALLBACK, 0, "batch", &batch, "format",
 470                        N_("show info and content of objects fed from the standard input"),
 471                        PARSE_OPT_OPTARG, batch_option_callback },
 472                { OPTION_CALLBACK, 0, "batch-check", &batch, "format",
 473                        N_("show info about objects fed from the standard input"),
 474                        PARSE_OPT_OPTARG, batch_option_callback },
 475                OPT_BOOL(0, "follow-symlinks", &batch.follow_symlinks,
 476                         N_("follow in-tree symlinks (used with --batch or --batch-check)")),
 477                OPT_BOOL(0, "batch-all-objects", &batch.all_objects,
 478                         N_("show all objects with --batch or --batch-check")),
 479                OPT_END()
 480        };
 481
 482        git_config(git_cat_file_config, NULL);
 483
 484        argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
 485
 486        if (opt) {
 487                if (argc == 1)
 488                        obj_name = argv[0];
 489                else
 490                        usage_with_options(cat_file_usage, options);
 491        }
 492        if (!opt && !batch.enabled) {
 493                if (argc == 2) {
 494                        exp_type = argv[0];
 495                        obj_name = argv[1];
 496                } else
 497                        usage_with_options(cat_file_usage, options);
 498        }
 499        if (batch.enabled && (opt || argc)) {
 500                usage_with_options(cat_file_usage, options);
 501        }
 502
 503        if ((batch.follow_symlinks || batch.all_objects) && !batch.enabled) {
 504                usage_with_options(cat_file_usage, options);
 505        }
 506
 507        if (batch.enabled)
 508                return batch_objects(&batch);
 509
 510        if (unknown_type && opt != 't' && opt != 's')
 511                die("git cat-file --allow-unknown-type: use with -s or -t");
 512        return cat_one_file(opt, exp_type, obj_name, unknown_type);
 513}