diff-lib.con commit trace: measure where the time is spent in the index-heavy operations (ca54d9b)
   1/*
   2 * Copyright (C) 2005 Junio C Hamano
   3 */
   4#include "cache.h"
   5#include "quote.h"
   6#include "commit.h"
   7#include "diff.h"
   8#include "diffcore.h"
   9#include "revision.h"
  10#include "cache-tree.h"
  11#include "unpack-trees.h"
  12#include "refs.h"
  13#include "submodule.h"
  14#include "dir.h"
  15#include "fsmonitor.h"
  16
  17/*
  18 * diff-files
  19 */
  20
  21/*
  22 * Has the work tree entity been removed?
  23 *
  24 * Return 1 if it was removed from the work tree, 0 if an entity to be
  25 * compared with the cache entry ce still exists (the latter includes
  26 * the case where a directory that is not a submodule repository
  27 * exists for ce that is a submodule -- it is a submodule that is not
  28 * checked out).  Return negative for an error.
  29 */
  30static int check_removed(const struct cache_entry *ce, struct stat *st)
  31{
  32        if (lstat(ce->name, st) < 0) {
  33                if (!is_missing_file_error(errno))
  34                        return -1;
  35                return 1;
  36        }
  37        if (has_symlink_leading_path(ce->name, ce_namelen(ce)))
  38                return 1;
  39        if (S_ISDIR(st->st_mode)) {
  40                struct object_id sub;
  41
  42                /*
  43                 * If ce is already a gitlink, we can have a plain
  44                 * directory (i.e. the submodule is not checked out),
  45                 * or a checked out submodule.  Either case this is not
  46                 * a case where something was removed from the work tree,
  47                 * so we will return 0.
  48                 *
  49                 * Otherwise, if the directory is not a submodule
  50                 * repository, that means ce which was a blob turned into
  51                 * a directory --- the blob was removed!
  52                 */
  53                if (!S_ISGITLINK(ce->ce_mode) &&
  54                    resolve_gitlink_ref(ce->name, "HEAD", &sub))
  55                        return 1;
  56        }
  57        return 0;
  58}
  59
  60/*
  61 * Has a file changed or has a submodule new commits or a dirty work tree?
  62 *
  63 * Return 1 when changes are detected, 0 otherwise. If the DIRTY_SUBMODULES
  64 * option is set, the caller does not only want to know if a submodule is
  65 * modified at all but wants to know all the conditions that are met (new
  66 * commits, untracked content and/or modified content).
  67 */
  68static int match_stat_with_submodule(struct diff_options *diffopt,
  69                                     const struct cache_entry *ce,
  70                                     struct stat *st, unsigned ce_option,
  71                                     unsigned *dirty_submodule)
  72{
  73        int changed = ce_match_stat(ce, st, ce_option);
  74        if (S_ISGITLINK(ce->ce_mode)) {
  75                struct diff_flags orig_flags = diffopt->flags;
  76                if (!diffopt->flags.override_submodule_config)
  77                        set_diffopt_flags_from_submodule_config(diffopt, ce->name);
  78                if (diffopt->flags.ignore_submodules)
  79                        changed = 0;
  80                else if (!diffopt->flags.ignore_dirty_submodules &&
  81                         (!changed || diffopt->flags.dirty_submodules))
  82                        *dirty_submodule = is_submodule_modified(ce->name,
  83                                                                 diffopt->flags.ignore_untracked_in_submodules);
  84                diffopt->flags = orig_flags;
  85        }
  86        return changed;
  87}
  88
  89int run_diff_files(struct rev_info *revs, unsigned int option)
  90{
  91        int entries, i;
  92        int diff_unmerged_stage = revs->max_count;
  93        unsigned ce_option = ((option & DIFF_RACY_IS_MODIFIED)
  94                              ? CE_MATCH_RACY_IS_DIRTY : 0);
  95        uint64_t start = getnanotime();
  96
  97        diff_set_mnemonic_prefix(&revs->diffopt, "i/", "w/");
  98
  99        if (diff_unmerged_stage < 0)
 100                diff_unmerged_stage = 2;
 101        entries = active_nr;
 102        for (i = 0; i < entries; i++) {
 103                unsigned int oldmode, newmode;
 104                struct cache_entry *ce = active_cache[i];
 105                int changed;
 106                unsigned dirty_submodule = 0;
 107                const struct object_id *old_oid, *new_oid;
 108
 109                if (diff_can_quit_early(&revs->diffopt))
 110                        break;
 111
 112                if (!ce_path_match(ce, &revs->prune_data, NULL))
 113                        continue;
 114
 115                if (ce_stage(ce)) {
 116                        struct combine_diff_path *dpath;
 117                        struct diff_filepair *pair;
 118                        unsigned int wt_mode = 0;
 119                        int num_compare_stages = 0;
 120                        size_t path_len;
 121                        struct stat st;
 122
 123                        path_len = ce_namelen(ce);
 124
 125                        dpath = xmalloc(combine_diff_path_size(5, path_len));
 126                        dpath->path = (char *) &(dpath->parent[5]);
 127
 128                        dpath->next = NULL;
 129                        memcpy(dpath->path, ce->name, path_len);
 130                        dpath->path[path_len] = '\0';
 131                        oidclr(&dpath->oid);
 132                        memset(&(dpath->parent[0]), 0,
 133                               sizeof(struct combine_diff_parent)*5);
 134
 135                        changed = check_removed(ce, &st);
 136                        if (!changed)
 137                                wt_mode = ce_mode_from_stat(ce, st.st_mode);
 138                        else {
 139                                if (changed < 0) {
 140                                        perror(ce->name);
 141                                        continue;
 142                                }
 143                                wt_mode = 0;
 144                        }
 145                        dpath->mode = wt_mode;
 146
 147                        while (i < entries) {
 148                                struct cache_entry *nce = active_cache[i];
 149                                int stage;
 150
 151                                if (strcmp(ce->name, nce->name))
 152                                        break;
 153
 154                                /* Stage #2 (ours) is the first parent,
 155                                 * stage #3 (theirs) is the second.
 156                                 */
 157                                stage = ce_stage(nce);
 158                                if (2 <= stage) {
 159                                        int mode = nce->ce_mode;
 160                                        num_compare_stages++;
 161                                        oidcpy(&dpath->parent[stage - 2].oid,
 162                                               &nce->oid);
 163                                        dpath->parent[stage-2].mode = ce_mode_from_stat(nce, mode);
 164                                        dpath->parent[stage-2].status =
 165                                                DIFF_STATUS_MODIFIED;
 166                                }
 167
 168                                /* diff against the proper unmerged stage */
 169                                if (stage == diff_unmerged_stage)
 170                                        ce = nce;
 171                                i++;
 172                        }
 173                        /*
 174                         * Compensate for loop update
 175                         */
 176                        i--;
 177
 178                        if (revs->combine_merges && num_compare_stages == 2) {
 179                                show_combined_diff(dpath, 2,
 180                                                   revs->dense_combined_merges,
 181                                                   revs);
 182                                free(dpath);
 183                                continue;
 184                        }
 185                        FREE_AND_NULL(dpath);
 186
 187                        /*
 188                         * Show the diff for the 'ce' if we found the one
 189                         * from the desired stage.
 190                         */
 191                        pair = diff_unmerge(&revs->diffopt, ce->name);
 192                        if (wt_mode)
 193                                pair->two->mode = wt_mode;
 194                        if (ce_stage(ce) != diff_unmerged_stage)
 195                                continue;
 196                }
 197
 198                if (ce_uptodate(ce) || ce_skip_worktree(ce))
 199                        continue;
 200
 201                /* If CE_VALID is set, don't look at workdir for file removal */
 202                if (ce->ce_flags & CE_VALID) {
 203                        changed = 0;
 204                        newmode = ce->ce_mode;
 205                } else {
 206                        struct stat st;
 207
 208                        changed = check_removed(ce, &st);
 209                        if (changed) {
 210                                if (changed < 0) {
 211                                        perror(ce->name);
 212                                        continue;
 213                                }
 214                                diff_addremove(&revs->diffopt, '-', ce->ce_mode,
 215                                               &ce->oid,
 216                                               !is_null_oid(&ce->oid),
 217                                               ce->name, 0);
 218                                continue;
 219                        } else if (revs->diffopt.ita_invisible_in_index &&
 220                                   ce_intent_to_add(ce)) {
 221                                diff_addremove(&revs->diffopt, '+', ce->ce_mode,
 222                                               the_hash_algo->empty_tree, 0,
 223                                               ce->name, 0);
 224                                continue;
 225                        }
 226
 227                        changed = match_stat_with_submodule(&revs->diffopt, ce, &st,
 228                                                            ce_option, &dirty_submodule);
 229                        newmode = ce_mode_from_stat(ce, st.st_mode);
 230                }
 231
 232                if (!changed && !dirty_submodule) {
 233                        ce_mark_uptodate(ce);
 234                        mark_fsmonitor_valid(ce);
 235                        if (!revs->diffopt.flags.find_copies_harder)
 236                                continue;
 237                }
 238                oldmode = ce->ce_mode;
 239                old_oid = &ce->oid;
 240                new_oid = changed ? &null_oid : &ce->oid;
 241                diff_change(&revs->diffopt, oldmode, newmode,
 242                            old_oid, new_oid,
 243                            !is_null_oid(old_oid),
 244                            !is_null_oid(new_oid),
 245                            ce->name, 0, dirty_submodule);
 246
 247        }
 248        diffcore_std(&revs->diffopt);
 249        diff_flush(&revs->diffopt);
 250        trace_performance_since(start, "diff-files");
 251        return 0;
 252}
 253
 254/*
 255 * diff-index
 256 */
 257
 258/* A file entry went away or appeared */
 259static void diff_index_show_file(struct rev_info *revs,
 260                                 const char *prefix,
 261                                 const struct cache_entry *ce,
 262                                 const struct object_id *oid, int oid_valid,
 263                                 unsigned int mode,
 264                                 unsigned dirty_submodule)
 265{
 266        diff_addremove(&revs->diffopt, prefix[0], mode,
 267                       oid, oid_valid, ce->name, dirty_submodule);
 268}
 269
 270static int get_stat_data(const struct cache_entry *ce,
 271                         const struct object_id **oidp,
 272                         unsigned int *modep,
 273                         int cached, int match_missing,
 274                         unsigned *dirty_submodule, struct diff_options *diffopt)
 275{
 276        const struct object_id *oid = &ce->oid;
 277        unsigned int mode = ce->ce_mode;
 278
 279        if (!cached && !ce_uptodate(ce)) {
 280                int changed;
 281                struct stat st;
 282                changed = check_removed(ce, &st);
 283                if (changed < 0)
 284                        return -1;
 285                else if (changed) {
 286                        if (match_missing) {
 287                                *oidp = oid;
 288                                *modep = mode;
 289                                return 0;
 290                        }
 291                        return -1;
 292                }
 293                changed = match_stat_with_submodule(diffopt, ce, &st,
 294                                                    0, dirty_submodule);
 295                if (changed) {
 296                        mode = ce_mode_from_stat(ce, st.st_mode);
 297                        oid = &null_oid;
 298                }
 299        }
 300
 301        *oidp = oid;
 302        *modep = mode;
 303        return 0;
 304}
 305
 306static void show_new_file(struct rev_info *revs,
 307                          const struct cache_entry *new,
 308                          int cached, int match_missing)
 309{
 310        const struct object_id *oid;
 311        unsigned int mode;
 312        unsigned dirty_submodule = 0;
 313
 314        /*
 315         * New file in the index: it might actually be different in
 316         * the working tree.
 317         */
 318        if (get_stat_data(new, &oid, &mode, cached, match_missing,
 319            &dirty_submodule, &revs->diffopt) < 0)
 320                return;
 321
 322        diff_index_show_file(revs, "+", new, oid, !is_null_oid(oid), mode, dirty_submodule);
 323}
 324
 325static int show_modified(struct rev_info *revs,
 326                         const struct cache_entry *old,
 327                         const struct cache_entry *new,
 328                         int report_missing,
 329                         int cached, int match_missing)
 330{
 331        unsigned int mode, oldmode;
 332        const struct object_id *oid;
 333        unsigned dirty_submodule = 0;
 334
 335        if (get_stat_data(new, &oid, &mode, cached, match_missing,
 336                          &dirty_submodule, &revs->diffopt) < 0) {
 337                if (report_missing)
 338                        diff_index_show_file(revs, "-", old,
 339                                             &old->oid, 1, old->ce_mode,
 340                                             0);
 341                return -1;
 342        }
 343
 344        if (revs->combine_merges && !cached &&
 345            (oidcmp(oid, &old->oid) || oidcmp(&old->oid, &new->oid))) {
 346                struct combine_diff_path *p;
 347                int pathlen = ce_namelen(new);
 348
 349                p = xmalloc(combine_diff_path_size(2, pathlen));
 350                p->path = (char *) &p->parent[2];
 351                p->next = NULL;
 352                memcpy(p->path, new->name, pathlen);
 353                p->path[pathlen] = 0;
 354                p->mode = mode;
 355                oidclr(&p->oid);
 356                memset(p->parent, 0, 2 * sizeof(struct combine_diff_parent));
 357                p->parent[0].status = DIFF_STATUS_MODIFIED;
 358                p->parent[0].mode = new->ce_mode;
 359                oidcpy(&p->parent[0].oid, &new->oid);
 360                p->parent[1].status = DIFF_STATUS_MODIFIED;
 361                p->parent[1].mode = old->ce_mode;
 362                oidcpy(&p->parent[1].oid, &old->oid);
 363                show_combined_diff(p, 2, revs->dense_combined_merges, revs);
 364                free(p);
 365                return 0;
 366        }
 367
 368        oldmode = old->ce_mode;
 369        if (mode == oldmode && !oidcmp(oid, &old->oid) && !dirty_submodule &&
 370            !revs->diffopt.flags.find_copies_harder)
 371                return 0;
 372
 373        diff_change(&revs->diffopt, oldmode, mode,
 374                    &old->oid, oid, 1, !is_null_oid(oid),
 375                    old->name, 0, dirty_submodule);
 376        return 0;
 377}
 378
 379/*
 380 * This gets a mix of an existing index and a tree, one pathname entry
 381 * at a time. The index entry may be a single stage-0 one, but it could
 382 * also be multiple unmerged entries (in which case idx_pos/idx_nr will
 383 * give you the position and number of entries in the index).
 384 */
 385static void do_oneway_diff(struct unpack_trees_options *o,
 386                           const struct cache_entry *idx,
 387                           const struct cache_entry *tree)
 388{
 389        struct rev_info *revs = o->unpack_data;
 390        int match_missing, cached;
 391
 392        /* i-t-a entries do not actually exist in the index */
 393        if (revs->diffopt.ita_invisible_in_index &&
 394            idx && ce_intent_to_add(idx)) {
 395                idx = NULL;
 396                if (!tree)
 397                        return; /* nothing to diff.. */
 398        }
 399
 400        /* if the entry is not checked out, don't examine work tree */
 401        cached = o->index_only ||
 402                (idx && ((idx->ce_flags & CE_VALID) || ce_skip_worktree(idx)));
 403        /*
 404         * Backward compatibility wart - "diff-index -m" does
 405         * not mean "do not ignore merges", but "match_missing".
 406         *
 407         * But with the revision flag parsing, that's found in
 408         * "!revs->ignore_merges".
 409         */
 410        match_missing = !revs->ignore_merges;
 411
 412        if (cached && idx && ce_stage(idx)) {
 413                struct diff_filepair *pair;
 414                pair = diff_unmerge(&revs->diffopt, idx->name);
 415                if (tree)
 416                        fill_filespec(pair->one, &tree->oid, 1,
 417                                      tree->ce_mode);
 418                return;
 419        }
 420
 421        /*
 422         * Something added to the tree?
 423         */
 424        if (!tree) {
 425                show_new_file(revs, idx, cached, match_missing);
 426                return;
 427        }
 428
 429        /*
 430         * Something removed from the tree?
 431         */
 432        if (!idx) {
 433                diff_index_show_file(revs, "-", tree, &tree->oid, 1,
 434                                     tree->ce_mode, 0);
 435                return;
 436        }
 437
 438        /* Show difference between old and new */
 439        show_modified(revs, tree, idx, 1, cached, match_missing);
 440}
 441
 442/*
 443 * The unpack_trees() interface is designed for merging, so
 444 * the different source entries are designed primarily for
 445 * the source trees, with the old index being really mainly
 446 * used for being replaced by the result.
 447 *
 448 * For diffing, the index is more important, and we only have a
 449 * single tree.
 450 *
 451 * We're supposed to advance o->pos to skip what we have already processed.
 452 *
 453 * This wrapper makes it all more readable, and takes care of all
 454 * the fairly complex unpack_trees() semantic requirements, including
 455 * the skipping, the path matching, the type conflict cases etc.
 456 */
 457static int oneway_diff(const struct cache_entry * const *src,
 458                       struct unpack_trees_options *o)
 459{
 460        const struct cache_entry *idx = src[0];
 461        const struct cache_entry *tree = src[1];
 462        struct rev_info *revs = o->unpack_data;
 463
 464        /*
 465         * Unpack-trees generates a DF/conflict entry if
 466         * there was a directory in the index and a tree
 467         * in the tree. From a diff standpoint, that's a
 468         * delete of the tree and a create of the file.
 469         */
 470        if (tree == o->df_conflict_entry)
 471                tree = NULL;
 472
 473        if (ce_path_match(idx ? idx : tree, &revs->prune_data, NULL)) {
 474                do_oneway_diff(o, idx, tree);
 475                if (diff_can_quit_early(&revs->diffopt)) {
 476                        o->exiting_early = 1;
 477                        return -1;
 478                }
 479        }
 480
 481        return 0;
 482}
 483
 484static int diff_cache(struct rev_info *revs,
 485                      const struct object_id *tree_oid,
 486                      const char *tree_name,
 487                      int cached)
 488{
 489        struct tree *tree;
 490        struct tree_desc t;
 491        struct unpack_trees_options opts;
 492
 493        tree = parse_tree_indirect(tree_oid);
 494        if (!tree)
 495                return error("bad tree object %s",
 496                             tree_name ? tree_name : oid_to_hex(tree_oid));
 497        memset(&opts, 0, sizeof(opts));
 498        opts.head_idx = 1;
 499        opts.index_only = cached;
 500        opts.diff_index_cached = (cached &&
 501                                  !revs->diffopt.flags.find_copies_harder);
 502        opts.merge = 1;
 503        opts.fn = oneway_diff;
 504        opts.unpack_data = revs;
 505        opts.src_index = &the_index;
 506        opts.dst_index = NULL;
 507        opts.pathspec = &revs->diffopt.pathspec;
 508        opts.pathspec->recursive = 1;
 509
 510        init_tree_desc(&t, tree->buffer, tree->size);
 511        return unpack_trees(1, &t, &opts);
 512}
 513
 514int run_diff_index(struct rev_info *revs, int cached)
 515{
 516        struct object_array_entry *ent;
 517        uint64_t start = getnanotime();
 518
 519        ent = revs->pending.objects;
 520        if (diff_cache(revs, &ent->item->oid, ent->name, cached))
 521                exit(128);
 522
 523        diff_set_mnemonic_prefix(&revs->diffopt, "c/", cached ? "i/" : "w/");
 524        diffcore_fix_diff_index(&revs->diffopt);
 525        diffcore_std(&revs->diffopt);
 526        diff_flush(&revs->diffopt);
 527        trace_performance_since(start, "diff-index");
 528        return 0;
 529}
 530
 531int do_diff_cache(const struct object_id *tree_oid, struct diff_options *opt)
 532{
 533        struct rev_info revs;
 534
 535        init_revisions(&revs, NULL);
 536        copy_pathspec(&revs.prune_data, &opt->pathspec);
 537        revs.diffopt = *opt;
 538
 539        if (diff_cache(&revs, tree_oid, NULL, 1))
 540                exit(128);
 541        return 0;
 542}
 543
 544int index_differs_from(const char *def, const struct diff_flags *flags,
 545                       int ita_invisible_in_index)
 546{
 547        struct rev_info rev;
 548        struct setup_revision_opt opt;
 549
 550        init_revisions(&rev, NULL);
 551        memset(&opt, 0, sizeof(opt));
 552        opt.def = def;
 553        setup_revisions(0, NULL, &rev, &opt);
 554        rev.diffopt.flags.quick = 1;
 555        rev.diffopt.flags.exit_with_status = 1;
 556        if (flags)
 557                diff_flags_or(&rev.diffopt.flags, flags);
 558        rev.diffopt.ita_invisible_in_index = ita_invisible_in_index;
 559        run_diff_index(&rev, 1);
 560        object_array_clear(&rev.pending);
 561        return (rev.diffopt.flags.has_changes != 0);
 562}