builtin / am.con commit builtin-am: extract patch and commit info with git-mailinfo (3e20dcf)
   1/*
   2 * Builtin "git am"
   3 *
   4 * Based on git-am.sh by Junio C Hamano.
   5 */
   6#include "cache.h"
   7#include "builtin.h"
   8#include "exec_cmd.h"
   9#include "parse-options.h"
  10#include "dir.h"
  11#include "run-command.h"
  12#include "quote.h"
  13
  14/**
  15 * Returns 1 if the file is empty or does not exist, 0 otherwise.
  16 */
  17static int is_empty_file(const char *filename)
  18{
  19        struct stat st;
  20
  21        if (stat(filename, &st) < 0) {
  22                if (errno == ENOENT)
  23                        return 1;
  24                die_errno(_("could not stat %s"), filename);
  25        }
  26
  27        return !st.st_size;
  28}
  29
  30/**
  31 * Like strbuf_getline(), but treats both '\n' and "\r\n" as line terminators.
  32 */
  33static int strbuf_getline_crlf(struct strbuf *sb, FILE *fp)
  34{
  35        if (strbuf_getwholeline(sb, fp, '\n'))
  36                return EOF;
  37        if (sb->buf[sb->len - 1] == '\n') {
  38                strbuf_setlen(sb, sb->len - 1);
  39                if (sb->len > 0 && sb->buf[sb->len - 1] == '\r')
  40                        strbuf_setlen(sb, sb->len - 1);
  41        }
  42        return 0;
  43}
  44
  45enum patch_format {
  46        PATCH_FORMAT_UNKNOWN = 0,
  47        PATCH_FORMAT_MBOX
  48};
  49
  50struct am_state {
  51        /* state directory path */
  52        char *dir;
  53
  54        /* current and last patch numbers, 1-indexed */
  55        int cur;
  56        int last;
  57
  58        /* commit metadata and message */
  59        char *author_name;
  60        char *author_email;
  61        char *author_date;
  62        char *msg;
  63        size_t msg_len;
  64
  65        /* number of digits in patch filename */
  66        int prec;
  67};
  68
  69/**
  70 * Initializes am_state with the default values. The state directory is set to
  71 * dir.
  72 */
  73static void am_state_init(struct am_state *state, const char *dir)
  74{
  75        memset(state, 0, sizeof(*state));
  76
  77        assert(dir);
  78        state->dir = xstrdup(dir);
  79
  80        state->prec = 4;
  81}
  82
  83/**
  84 * Releases memory allocated by an am_state.
  85 */
  86static void am_state_release(struct am_state *state)
  87{
  88        free(state->dir);
  89        free(state->author_name);
  90        free(state->author_email);
  91        free(state->author_date);
  92        free(state->msg);
  93}
  94
  95/**
  96 * Returns path relative to the am_state directory.
  97 */
  98static inline const char *am_path(const struct am_state *state, const char *path)
  99{
 100        return mkpath("%s/%s", state->dir, path);
 101}
 102
 103/**
 104 * Returns 1 if there is an am session in progress, 0 otherwise.
 105 */
 106static int am_in_progress(const struct am_state *state)
 107{
 108        struct stat st;
 109
 110        if (lstat(state->dir, &st) < 0 || !S_ISDIR(st.st_mode))
 111                return 0;
 112        if (lstat(am_path(state, "last"), &st) || !S_ISREG(st.st_mode))
 113                return 0;
 114        if (lstat(am_path(state, "next"), &st) || !S_ISREG(st.st_mode))
 115                return 0;
 116        return 1;
 117}
 118
 119/**
 120 * Reads the contents of `file` in the `state` directory into `sb`. Returns the
 121 * number of bytes read on success, -1 if the file does not exist. If `trim` is
 122 * set, trailing whitespace will be removed.
 123 */
 124static int read_state_file(struct strbuf *sb, const struct am_state *state,
 125                        const char *file, int trim)
 126{
 127        strbuf_reset(sb);
 128
 129        if (strbuf_read_file(sb, am_path(state, file), 0) >= 0) {
 130                if (trim)
 131                        strbuf_trim(sb);
 132
 133                return sb->len;
 134        }
 135
 136        if (errno == ENOENT)
 137                return -1;
 138
 139        die_errno(_("could not read '%s'"), am_path(state, file));
 140}
 141
 142/**
 143 * Reads a KEY=VALUE shell variable assignment from `fp`, returning the VALUE
 144 * as a newly-allocated string. VALUE must be a quoted string, and the KEY must
 145 * match `key`. Returns NULL on failure.
 146 *
 147 * This is used by read_author_script() to read the GIT_AUTHOR_* variables from
 148 * the author-script.
 149 */
 150static char *read_shell_var(FILE *fp, const char *key)
 151{
 152        struct strbuf sb = STRBUF_INIT;
 153        const char *str;
 154
 155        if (strbuf_getline(&sb, fp, '\n'))
 156                goto fail;
 157
 158        if (!skip_prefix(sb.buf, key, &str))
 159                goto fail;
 160
 161        if (!skip_prefix(str, "=", &str))
 162                goto fail;
 163
 164        strbuf_remove(&sb, 0, str - sb.buf);
 165
 166        str = sq_dequote(sb.buf);
 167        if (!str)
 168                goto fail;
 169
 170        return strbuf_detach(&sb, NULL);
 171
 172fail:
 173        strbuf_release(&sb);
 174        return NULL;
 175}
 176
 177/**
 178 * Reads and parses the state directory's "author-script" file, and sets
 179 * state->author_name, state->author_email and state->author_date accordingly.
 180 * Returns 0 on success, -1 if the file could not be parsed.
 181 *
 182 * The author script is of the format:
 183 *
 184 *      GIT_AUTHOR_NAME='$author_name'
 185 *      GIT_AUTHOR_EMAIL='$author_email'
 186 *      GIT_AUTHOR_DATE='$author_date'
 187 *
 188 * where $author_name, $author_email and $author_date are quoted. We are strict
 189 * with our parsing, as the file was meant to be eval'd in the old git-am.sh
 190 * script, and thus if the file differs from what this function expects, it is
 191 * better to bail out than to do something that the user does not expect.
 192 */
 193static int read_author_script(struct am_state *state)
 194{
 195        const char *filename = am_path(state, "author-script");
 196        FILE *fp;
 197
 198        assert(!state->author_name);
 199        assert(!state->author_email);
 200        assert(!state->author_date);
 201
 202        fp = fopen(filename, "r");
 203        if (!fp) {
 204                if (errno == ENOENT)
 205                        return 0;
 206                die_errno(_("could not open '%s' for reading"), filename);
 207        }
 208
 209        state->author_name = read_shell_var(fp, "GIT_AUTHOR_NAME");
 210        if (!state->author_name) {
 211                fclose(fp);
 212                return -1;
 213        }
 214
 215        state->author_email = read_shell_var(fp, "GIT_AUTHOR_EMAIL");
 216        if (!state->author_email) {
 217                fclose(fp);
 218                return -1;
 219        }
 220
 221        state->author_date = read_shell_var(fp, "GIT_AUTHOR_DATE");
 222        if (!state->author_date) {
 223                fclose(fp);
 224                return -1;
 225        }
 226
 227        if (fgetc(fp) != EOF) {
 228                fclose(fp);
 229                return -1;
 230        }
 231
 232        fclose(fp);
 233        return 0;
 234}
 235
 236/**
 237 * Saves state->author_name, state->author_email and state->author_date in the
 238 * state directory's "author-script" file.
 239 */
 240static void write_author_script(const struct am_state *state)
 241{
 242        struct strbuf sb = STRBUF_INIT;
 243
 244        strbuf_addstr(&sb, "GIT_AUTHOR_NAME=");
 245        sq_quote_buf(&sb, state->author_name);
 246        strbuf_addch(&sb, '\n');
 247
 248        strbuf_addstr(&sb, "GIT_AUTHOR_EMAIL=");
 249        sq_quote_buf(&sb, state->author_email);
 250        strbuf_addch(&sb, '\n');
 251
 252        strbuf_addstr(&sb, "GIT_AUTHOR_DATE=");
 253        sq_quote_buf(&sb, state->author_date);
 254        strbuf_addch(&sb, '\n');
 255
 256        write_file(am_path(state, "author-script"), 1, "%s", sb.buf);
 257
 258        strbuf_release(&sb);
 259}
 260
 261/**
 262 * Reads the commit message from the state directory's "final-commit" file,
 263 * setting state->msg to its contents and state->msg_len to the length of its
 264 * contents in bytes.
 265 *
 266 * Returns 0 on success, -1 if the file does not exist.
 267 */
 268static int read_commit_msg(struct am_state *state)
 269{
 270        struct strbuf sb = STRBUF_INIT;
 271
 272        assert(!state->msg);
 273
 274        if (read_state_file(&sb, state, "final-commit", 0) < 0) {
 275                strbuf_release(&sb);
 276                return -1;
 277        }
 278
 279        state->msg = strbuf_detach(&sb, &state->msg_len);
 280        return 0;
 281}
 282
 283/**
 284 * Saves state->msg in the state directory's "final-commit" file.
 285 */
 286static void write_commit_msg(const struct am_state *state)
 287{
 288        int fd;
 289        const char *filename = am_path(state, "final-commit");
 290
 291        fd = xopen(filename, O_WRONLY | O_CREAT, 0666);
 292        if (write_in_full(fd, state->msg, state->msg_len) < 0)
 293                die_errno(_("could not write to %s"), filename);
 294        close(fd);
 295}
 296
 297/**
 298 * Loads state from disk.
 299 */
 300static void am_load(struct am_state *state)
 301{
 302        struct strbuf sb = STRBUF_INIT;
 303
 304        if (read_state_file(&sb, state, "next", 1) < 0)
 305                die("BUG: state file 'next' does not exist");
 306        state->cur = strtol(sb.buf, NULL, 10);
 307
 308        if (read_state_file(&sb, state, "last", 1) < 0)
 309                die("BUG: state file 'last' does not exist");
 310        state->last = strtol(sb.buf, NULL, 10);
 311
 312        if (read_author_script(state) < 0)
 313                die(_("could not parse author script"));
 314
 315        read_commit_msg(state);
 316
 317        strbuf_release(&sb);
 318}
 319
 320/**
 321 * Removes the am_state directory, forcefully terminating the current am
 322 * session.
 323 */
 324static void am_destroy(const struct am_state *state)
 325{
 326        struct strbuf sb = STRBUF_INIT;
 327
 328        strbuf_addstr(&sb, state->dir);
 329        remove_dir_recursively(&sb, 0);
 330        strbuf_release(&sb);
 331}
 332
 333/**
 334 * Determines if the file looks like a piece of RFC2822 mail by grabbing all
 335 * non-indented lines and checking if they look like they begin with valid
 336 * header field names.
 337 *
 338 * Returns 1 if the file looks like a piece of mail, 0 otherwise.
 339 */
 340static int is_mail(FILE *fp)
 341{
 342        const char *header_regex = "^[!-9;-~]+:";
 343        struct strbuf sb = STRBUF_INIT;
 344        regex_t regex;
 345        int ret = 1;
 346
 347        if (fseek(fp, 0L, SEEK_SET))
 348                die_errno(_("fseek failed"));
 349
 350        if (regcomp(&regex, header_regex, REG_NOSUB | REG_EXTENDED))
 351                die("invalid pattern: %s", header_regex);
 352
 353        while (!strbuf_getline_crlf(&sb, fp)) {
 354                if (!sb.len)
 355                        break; /* End of header */
 356
 357                /* Ignore indented folded lines */
 358                if (*sb.buf == '\t' || *sb.buf == ' ')
 359                        continue;
 360
 361                /* It's a header if it matches header_regex */
 362                if (regexec(&regex, sb.buf, 0, NULL, 0)) {
 363                        ret = 0;
 364                        goto done;
 365                }
 366        }
 367
 368done:
 369        regfree(&regex);
 370        strbuf_release(&sb);
 371        return ret;
 372}
 373
 374/**
 375 * Attempts to detect the patch_format of the patches contained in `paths`,
 376 * returning the PATCH_FORMAT_* enum value. Returns PATCH_FORMAT_UNKNOWN if
 377 * detection fails.
 378 */
 379static int detect_patch_format(const char **paths)
 380{
 381        enum patch_format ret = PATCH_FORMAT_UNKNOWN;
 382        struct strbuf l1 = STRBUF_INIT;
 383        FILE *fp;
 384
 385        /*
 386         * We default to mbox format if input is from stdin and for directories
 387         */
 388        if (!*paths || !strcmp(*paths, "-") || is_directory(*paths))
 389                return PATCH_FORMAT_MBOX;
 390
 391        /*
 392         * Otherwise, check the first few lines of the first patch, starting
 393         * from the first non-blank line, to try to detect its format.
 394         */
 395
 396        fp = xfopen(*paths, "r");
 397
 398        while (!strbuf_getline_crlf(&l1, fp)) {
 399                if (l1.len)
 400                        break;
 401        }
 402
 403        if (starts_with(l1.buf, "From ") || starts_with(l1.buf, "From: ")) {
 404                ret = PATCH_FORMAT_MBOX;
 405                goto done;
 406        }
 407
 408        if (l1.len && is_mail(fp)) {
 409                ret = PATCH_FORMAT_MBOX;
 410                goto done;
 411        }
 412
 413done:
 414        fclose(fp);
 415        strbuf_release(&l1);
 416        return ret;
 417}
 418
 419/**
 420 * Splits out individual email patches from `paths`, where each path is either
 421 * a mbox file or a Maildir. Returns 0 on success, -1 on failure.
 422 */
 423static int split_mail_mbox(struct am_state *state, const char **paths)
 424{
 425        struct child_process cp = CHILD_PROCESS_INIT;
 426        struct strbuf last = STRBUF_INIT;
 427
 428        cp.git_cmd = 1;
 429        argv_array_push(&cp.args, "mailsplit");
 430        argv_array_pushf(&cp.args, "-d%d", state->prec);
 431        argv_array_pushf(&cp.args, "-o%s", state->dir);
 432        argv_array_push(&cp.args, "-b");
 433        argv_array_push(&cp.args, "--");
 434        argv_array_pushv(&cp.args, paths);
 435
 436        if (capture_command(&cp, &last, 8))
 437                return -1;
 438
 439        state->cur = 1;
 440        state->last = strtol(last.buf, NULL, 10);
 441
 442        return 0;
 443}
 444
 445/**
 446 * Splits a list of files/directories into individual email patches. Each path
 447 * in `paths` must be a file/directory that is formatted according to
 448 * `patch_format`.
 449 *
 450 * Once split out, the individual email patches will be stored in the state
 451 * directory, with each patch's filename being its index, padded to state->prec
 452 * digits.
 453 *
 454 * state->cur will be set to the index of the first mail, and state->last will
 455 * be set to the index of the last mail.
 456 *
 457 * Returns 0 on success, -1 on failure.
 458 */
 459static int split_mail(struct am_state *state, enum patch_format patch_format,
 460                        const char **paths)
 461{
 462        switch (patch_format) {
 463        case PATCH_FORMAT_MBOX:
 464                return split_mail_mbox(state, paths);
 465        default:
 466                die("BUG: invalid patch_format");
 467        }
 468        return -1;
 469}
 470
 471/**
 472 * Setup a new am session for applying patches
 473 */
 474static void am_setup(struct am_state *state, enum patch_format patch_format,
 475                        const char **paths)
 476{
 477        if (!patch_format)
 478                patch_format = detect_patch_format(paths);
 479
 480        if (!patch_format) {
 481                fprintf_ln(stderr, _("Patch format detection failed."));
 482                exit(128);
 483        }
 484
 485        if (mkdir(state->dir, 0777) < 0 && errno != EEXIST)
 486                die_errno(_("failed to create directory '%s'"), state->dir);
 487
 488        if (split_mail(state, patch_format, paths) < 0) {
 489                am_destroy(state);
 490                die(_("Failed to split patches."));
 491        }
 492
 493        /*
 494         * NOTE: Since the "next" and "last" files determine if an am_state
 495         * session is in progress, they should be written last.
 496         */
 497
 498        write_file(am_path(state, "next"), 1, "%d", state->cur);
 499
 500        write_file(am_path(state, "last"), 1, "%d", state->last);
 501}
 502
 503/**
 504 * Increments the patch pointer, and cleans am_state for the application of the
 505 * next patch.
 506 */
 507static void am_next(struct am_state *state)
 508{
 509        free(state->author_name);
 510        state->author_name = NULL;
 511
 512        free(state->author_email);
 513        state->author_email = NULL;
 514
 515        free(state->author_date);
 516        state->author_date = NULL;
 517
 518        free(state->msg);
 519        state->msg = NULL;
 520        state->msg_len = 0;
 521
 522        unlink(am_path(state, "author-script"));
 523        unlink(am_path(state, "final-commit"));
 524
 525        state->cur++;
 526        write_file(am_path(state, "next"), 1, "%d", state->cur);
 527}
 528
 529/**
 530 * Returns the filename of the current patch email.
 531 */
 532static const char *msgnum(const struct am_state *state)
 533{
 534        static struct strbuf sb = STRBUF_INIT;
 535
 536        strbuf_reset(&sb);
 537        strbuf_addf(&sb, "%0*d", state->prec, state->cur);
 538
 539        return sb.buf;
 540}
 541
 542/**
 543 * Parses `mail` using git-mailinfo, extracting its patch and authorship info.
 544 * state->msg will be set to the patch message. state->author_name,
 545 * state->author_email and state->author_date will be set to the patch author's
 546 * name, email and date respectively. The patch body will be written to the
 547 * state directory's "patch" file.
 548 *
 549 * Returns 1 if the patch should be skipped, 0 otherwise.
 550 */
 551static int parse_mail(struct am_state *state, const char *mail)
 552{
 553        FILE *fp;
 554        struct child_process cp = CHILD_PROCESS_INIT;
 555        struct strbuf sb = STRBUF_INIT;
 556        struct strbuf msg = STRBUF_INIT;
 557        struct strbuf author_name = STRBUF_INIT;
 558        struct strbuf author_date = STRBUF_INIT;
 559        struct strbuf author_email = STRBUF_INIT;
 560        int ret = 0;
 561
 562        cp.git_cmd = 1;
 563        cp.in = xopen(mail, O_RDONLY, 0);
 564        cp.out = xopen(am_path(state, "info"), O_WRONLY | O_CREAT, 0777);
 565
 566        argv_array_push(&cp.args, "mailinfo");
 567        argv_array_push(&cp.args, am_path(state, "msg"));
 568        argv_array_push(&cp.args, am_path(state, "patch"));
 569
 570        if (run_command(&cp) < 0)
 571                die("could not parse patch");
 572
 573        close(cp.in);
 574        close(cp.out);
 575
 576        /* Extract message and author information */
 577        fp = xfopen(am_path(state, "info"), "r");
 578        while (!strbuf_getline(&sb, fp, '\n')) {
 579                const char *x;
 580
 581                if (skip_prefix(sb.buf, "Subject: ", &x)) {
 582                        if (msg.len)
 583                                strbuf_addch(&msg, '\n');
 584                        strbuf_addstr(&msg, x);
 585                } else if (skip_prefix(sb.buf, "Author: ", &x))
 586                        strbuf_addstr(&author_name, x);
 587                else if (skip_prefix(sb.buf, "Email: ", &x))
 588                        strbuf_addstr(&author_email, x);
 589                else if (skip_prefix(sb.buf, "Date: ", &x))
 590                        strbuf_addstr(&author_date, x);
 591        }
 592        fclose(fp);
 593
 594        /* Skip pine's internal folder data */
 595        if (!strcmp(author_name.buf, "Mail System Internal Data")) {
 596                ret = 1;
 597                goto finish;
 598        }
 599
 600        if (is_empty_file(am_path(state, "patch"))) {
 601                printf_ln(_("Patch is empty. Was it split wrong?"));
 602                exit(128);
 603        }
 604
 605        strbuf_addstr(&msg, "\n\n");
 606        if (strbuf_read_file(&msg, am_path(state, "msg"), 0) < 0)
 607                die_errno(_("could not read '%s'"), am_path(state, "msg"));
 608        stripspace(&msg, 0);
 609
 610        assert(!state->author_name);
 611        state->author_name = strbuf_detach(&author_name, NULL);
 612
 613        assert(!state->author_email);
 614        state->author_email = strbuf_detach(&author_email, NULL);
 615
 616        assert(!state->author_date);
 617        state->author_date = strbuf_detach(&author_date, NULL);
 618
 619        assert(!state->msg);
 620        state->msg = strbuf_detach(&msg, &state->msg_len);
 621
 622finish:
 623        strbuf_release(&msg);
 624        strbuf_release(&author_date);
 625        strbuf_release(&author_email);
 626        strbuf_release(&author_name);
 627        strbuf_release(&sb);
 628        return ret;
 629}
 630
 631/**
 632 * Applies all queued mail.
 633 */
 634static void am_run(struct am_state *state)
 635{
 636        while (state->cur <= state->last) {
 637                const char *mail = am_path(state, msgnum(state));
 638
 639                if (!file_exists(mail))
 640                        goto next;
 641
 642                if (parse_mail(state, mail))
 643                        goto next; /* mail should be skipped */
 644
 645                write_author_script(state);
 646                write_commit_msg(state);
 647
 648                /* NEEDSWORK: Patch application not implemented yet */
 649
 650next:
 651                am_next(state);
 652        }
 653
 654        am_destroy(state);
 655}
 656
 657/**
 658 * parse_options() callback that validates and sets opt->value to the
 659 * PATCH_FORMAT_* enum value corresponding to `arg`.
 660 */
 661static int parse_opt_patchformat(const struct option *opt, const char *arg, int unset)
 662{
 663        int *opt_value = opt->value;
 664
 665        if (!strcmp(arg, "mbox"))
 666                *opt_value = PATCH_FORMAT_MBOX;
 667        else
 668                return error(_("Invalid value for --patch-format: %s"), arg);
 669        return 0;
 670}
 671
 672int cmd_am(int argc, const char **argv, const char *prefix)
 673{
 674        struct am_state state;
 675        int patch_format = PATCH_FORMAT_UNKNOWN;
 676
 677        const char * const usage[] = {
 678                N_("git am [options] [(<mbox>|<Maildir>)...]"),
 679                NULL
 680        };
 681
 682        struct option options[] = {
 683                OPT_CALLBACK(0, "patch-format", &patch_format, N_("format"),
 684                        N_("format the patch(es) are in"),
 685                        parse_opt_patchformat),
 686                OPT_END()
 687        };
 688
 689        /*
 690         * NEEDSWORK: Once all the features of git-am.sh have been
 691         * re-implemented in builtin/am.c, this preamble can be removed.
 692         */
 693        if (!getenv("_GIT_USE_BUILTIN_AM")) {
 694                const char *path = mkpath("%s/git-am", git_exec_path());
 695
 696                if (sane_execvp(path, (char **)argv) < 0)
 697                        die_errno("could not exec %s", path);
 698        } else {
 699                prefix = setup_git_directory();
 700                trace_repo_setup(prefix);
 701                setup_work_tree();
 702        }
 703
 704        git_config(git_default_config, NULL);
 705
 706        am_state_init(&state, git_path("rebase-apply"));
 707
 708        argc = parse_options(argc, argv, prefix, options, usage, 0);
 709
 710        if (am_in_progress(&state))
 711                am_load(&state);
 712        else {
 713                struct argv_array paths = ARGV_ARRAY_INIT;
 714                int i;
 715
 716                for (i = 0; i < argc; i++) {
 717                        if (is_absolute_path(argv[i]) || !prefix)
 718                                argv_array_push(&paths, argv[i]);
 719                        else
 720                                argv_array_push(&paths, mkpath("%s/%s", prefix, argv[i]));
 721                }
 722
 723                am_setup(&state, patch_format, paths.argv);
 724
 725                argv_array_clear(&paths);
 726        }
 727
 728        am_run(&state);
 729
 730        am_state_release(&state);
 731
 732        return 0;
 733}