builtin / cat-file.con commit pack-objects: use streaming interface for reading large loose blobs (cf2ba13)
   1/*
   2 * GIT - The information manager from hell
   3 *
   4 * Copyright (C) Linus Torvalds, 2005
   5 */
   6#include "cache.h"
   7#include "exec_cmd.h"
   8#include "tag.h"
   9#include "tree.h"
  10#include "builtin.h"
  11#include "parse-options.h"
  12#include "diff.h"
  13#include "userdiff.h"
  14#include "streaming.h"
  15
  16#define BATCH 1
  17#define BATCH_CHECK 2
  18
  19static void pprint_tag(const unsigned char *sha1, const char *buf, unsigned long size)
  20{
  21        /* the parser in tag.c is useless here. */
  22        const char *endp = buf + size;
  23        const char *cp = buf;
  24
  25        while (cp < endp) {
  26                char c = *cp++;
  27                if (c != '\n')
  28                        continue;
  29                if (7 <= endp - cp && !memcmp("tagger ", cp, 7)) {
  30                        const char *tagger = cp;
  31
  32                        /* Found the tagger line.  Copy out the contents
  33                         * of the buffer so far.
  34                         */
  35                        write_or_die(1, buf, cp - buf);
  36
  37                        /*
  38                         * Do something intelligent, like pretty-printing
  39                         * the date.
  40                         */
  41                        while (cp < endp) {
  42                                if (*cp++ == '\n') {
  43                                        /* tagger to cp is a line
  44                                         * that has ident and time.
  45                                         */
  46                                        const char *sp = tagger;
  47                                        char *ep;
  48                                        unsigned long date;
  49                                        long tz;
  50                                        while (sp < cp && *sp != '>')
  51                                                sp++;
  52                                        if (sp == cp) {
  53                                                /* give up */
  54                                                write_or_die(1, tagger,
  55                                                             cp - tagger);
  56                                                break;
  57                                        }
  58                                        while (sp < cp &&
  59                                               !('0' <= *sp && *sp <= '9'))
  60                                                sp++;
  61                                        write_or_die(1, tagger, sp - tagger);
  62                                        date = strtoul(sp, &ep, 10);
  63                                        tz = strtol(ep, NULL, 10);
  64                                        sp = show_date(date, tz, 0);
  65                                        write_or_die(1, sp, strlen(sp));
  66                                        xwrite(1, "\n", 1);
  67                                        break;
  68                                }
  69                        }
  70                        break;
  71                }
  72                if (cp < endp && *cp == '\n')
  73                        /* end of header */
  74                        break;
  75        }
  76        /* At this point, we have copied out the header up to the end of
  77         * the tagger line and cp points at one past \n.  It could be the
  78         * next header line after the tagger line, or it could be another
  79         * \n that marks the end of the headers.  We need to copy out the
  80         * remainder as is.
  81         */
  82        if (cp < endp)
  83                write_or_die(1, cp, endp - cp);
  84}
  85
  86static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
  87{
  88        unsigned char sha1[20];
  89        enum object_type type;
  90        char *buf;
  91        unsigned long size;
  92        struct object_context obj_context;
  93
  94        if (get_sha1_with_context(obj_name, sha1, &obj_context))
  95                die("Not a valid object name %s", obj_name);
  96
  97        buf = NULL;
  98        switch (opt) {
  99        case 't':
 100                type = sha1_object_info(sha1, NULL);
 101                if (type > 0) {
 102                        printf("%s\n", typename(type));
 103                        return 0;
 104                }
 105                break;
 106
 107        case 's':
 108                type = sha1_object_info(sha1, &size);
 109                if (type > 0) {
 110                        printf("%lu\n", size);
 111                        return 0;
 112                }
 113                break;
 114
 115        case 'e':
 116                return !has_sha1_file(sha1);
 117
 118        case 'p':
 119                type = sha1_object_info(sha1, NULL);
 120                if (type < 0)
 121                        die("Not a valid object name %s", obj_name);
 122
 123                /* custom pretty-print here */
 124                if (type == OBJ_TREE) {
 125                        const char *ls_args[3] = { NULL };
 126                        ls_args[0] =  "ls-tree";
 127                        ls_args[1] =  obj_name;
 128                        return cmd_ls_tree(2, ls_args, NULL);
 129                }
 130
 131                if (type == OBJ_BLOB)
 132                        return stream_blob_to_fd(1, sha1, NULL, 0);
 133                buf = read_sha1_file(sha1, &type, &size);
 134                if (!buf)
 135                        die("Cannot read object %s", obj_name);
 136                if (type == OBJ_TAG) {
 137                        pprint_tag(sha1, buf, size);
 138                        return 0;
 139                }
 140
 141                /* otherwise just spit out the data */
 142                break;
 143
 144        case 'c':
 145                if (!obj_context.path[0])
 146                        die("git cat-file --textconv %s: <object> must be <sha1:path>",
 147                            obj_name);
 148
 149                if (!textconv_object(obj_context.path, obj_context.mode, sha1, &buf, &size))
 150                        die("git cat-file --textconv: unable to run textconv on %s",
 151                            obj_name);
 152                break;
 153
 154        case 0:
 155                if (type_from_string(exp_type) == OBJ_BLOB) {
 156                        unsigned char blob_sha1[20];
 157                        if (sha1_object_info(sha1, NULL) == OBJ_TAG) {
 158                                enum object_type type;
 159                                unsigned long size;
 160                                char *buffer = read_sha1_file(sha1, &type, &size);
 161                                if (memcmp(buffer, "object ", 7) ||
 162                                    get_sha1_hex(buffer + 7, blob_sha1))
 163                                        die("%s not a valid tag", sha1_to_hex(sha1));
 164                                free(buffer);
 165                        } else
 166                                hashcpy(blob_sha1, sha1);
 167
 168                        if (sha1_object_info(blob_sha1, NULL) == OBJ_BLOB)
 169                                return stream_blob_to_fd(1, blob_sha1, NULL, 0);
 170                        /*
 171                         * we attempted to dereference a tag to a blob
 172                         * and failed; there may be new dereference
 173                         * mechanisms this code is not aware of.
 174                         * fall-back to the usual case.
 175                         */
 176                }
 177                buf = read_object_with_reference(sha1, exp_type, &size, NULL);
 178                break;
 179
 180        default:
 181                die("git cat-file: unknown option: %s", exp_type);
 182        }
 183
 184        if (!buf)
 185                die("git cat-file %s: bad file", obj_name);
 186
 187        write_or_die(1, buf, size);
 188        return 0;
 189}
 190
 191static int batch_one_object(const char *obj_name, int print_contents)
 192{
 193        unsigned char sha1[20];
 194        enum object_type type = 0;
 195        unsigned long size;
 196        void *contents = contents;
 197
 198        if (!obj_name)
 199           return 1;
 200
 201        if (get_sha1(obj_name, sha1)) {
 202                printf("%s missing\n", obj_name);
 203                fflush(stdout);
 204                return 0;
 205        }
 206
 207        if (print_contents == BATCH)
 208                contents = read_sha1_file(sha1, &type, &size);
 209        else
 210                type = sha1_object_info(sha1, &size);
 211
 212        if (type <= 0) {
 213                printf("%s missing\n", obj_name);
 214                fflush(stdout);
 215                if (print_contents == BATCH)
 216                        free(contents);
 217                return 0;
 218        }
 219
 220        printf("%s %s %lu\n", sha1_to_hex(sha1), typename(type), size);
 221        fflush(stdout);
 222
 223        if (print_contents == BATCH) {
 224                write_or_die(1, contents, size);
 225                printf("\n");
 226                fflush(stdout);
 227                free(contents);
 228        }
 229
 230        return 0;
 231}
 232
 233static int batch_objects(int print_contents)
 234{
 235        struct strbuf buf = STRBUF_INIT;
 236
 237        while (strbuf_getline(&buf, stdin, '\n') != EOF) {
 238                int error = batch_one_object(buf.buf, print_contents);
 239                if (error)
 240                        return error;
 241        }
 242
 243        return 0;
 244}
 245
 246static const char * const cat_file_usage[] = {
 247        "git cat-file (-t|-s|-e|-p|<type>|--textconv) <object>",
 248        "git cat-file (--batch|--batch-check) < <list_of_objects>",
 249        NULL
 250};
 251
 252static int git_cat_file_config(const char *var, const char *value, void *cb)
 253{
 254        if (userdiff_config(var, value) < 0)
 255                return -1;
 256
 257        return git_default_config(var, value, cb);
 258}
 259
 260int cmd_cat_file(int argc, const char **argv, const char *prefix)
 261{
 262        int opt = 0, batch = 0;
 263        const char *exp_type = NULL, *obj_name = NULL;
 264
 265        const struct option options[] = {
 266                OPT_GROUP("<type> can be one of: blob, tree, commit, tag"),
 267                OPT_SET_INT('t', NULL, &opt, "show object type", 't'),
 268                OPT_SET_INT('s', NULL, &opt, "show object size", 's'),
 269                OPT_SET_INT('e', NULL, &opt,
 270                            "exit with zero when there's no error", 'e'),
 271                OPT_SET_INT('p', NULL, &opt, "pretty-print object's content", 'p'),
 272                OPT_SET_INT(0, "textconv", &opt,
 273                            "for blob objects, run textconv on object's content", 'c'),
 274                OPT_SET_INT(0, "batch", &batch,
 275                            "show info and content of objects fed from the standard input",
 276                            BATCH),
 277                OPT_SET_INT(0, "batch-check", &batch,
 278                            "show info about objects fed from the standard input",
 279                            BATCH_CHECK),
 280                OPT_END()
 281        };
 282
 283        git_config(git_cat_file_config, NULL);
 284
 285        if (argc != 3 && argc != 2)
 286                usage_with_options(cat_file_usage, options);
 287
 288        argc = parse_options(argc, argv, prefix, options, cat_file_usage, 0);
 289
 290        if (opt) {
 291                if (argc == 1)
 292                        obj_name = argv[0];
 293                else
 294                        usage_with_options(cat_file_usage, options);
 295        }
 296        if (!opt && !batch) {
 297                if (argc == 2) {
 298                        exp_type = argv[0];
 299                        obj_name = argv[1];
 300                } else
 301                        usage_with_options(cat_file_usage, options);
 302        }
 303        if (batch && (opt || argc)) {
 304                usage_with_options(cat_file_usage, options);
 305        }
 306
 307        if (batch)
 308                return batch_objects(batch);
 309
 310        return cat_one_file(opt, exp_type, obj_name);
 311}