t / t0212 / parse_events.perlon commit hash: add a function to lookup hash algorithm by length (9539978)
   1#!/usr/bin/perl
   2#
   3# Parse event stream and convert individual events into a summary
   4# record for the process.
   5#
   6# Git.exe generates one or more "event" records for each API method,
   7# such as "start <argv>" and "exit <code>", during the life of the git
   8# process.  Additionally, the input may contain interleaved events
   9# from multiple concurrent git processes and/or multiple threads from
  10# within a git process.
  11#
  12# Accumulate events for each process (based on its unique SID) in a
  13# dictionary and emit process summary records.
  14#
  15# Convert some of the variable fields (such as elapsed time) into
  16# placeholders (or omit them) to make HEREDOC comparisons easier in
  17# the test scripts.
  18#
  19# We may also omit fields not (currently) useful for testing purposes.
  20
  21use strict;
  22use warnings;
  23use JSON::PP;
  24use Data::Dumper;
  25use Getopt::Long;
  26
  27# The version of the trace2 event target format that we understand.
  28# This is reported in the 'version' event in the 'evt' field.
  29# It comes from the GIT_TR2_EVENT_VERSION macro in trace2/tr2_tgt_event.c
  30my $evt_version = '1';
  31
  32my $show_children = 1;
  33my $show_exec     = 1;
  34my $show_threads  = 1;
  35
  36# A hack to generate test HEREDOC data for pasting into the test script.
  37# Usage:
  38#    cd "t/trash directory.t0212-trace2-event"
  39#    $TT trace ... >trace.event
  40#    VV=$(../../git.exe version | sed -e 's/^git version //')
  41#    perl ../t0212/parse_events.perl --HEREDOC --VERSION=$VV <trace.event >heredoc
  42# Then paste heredoc into your new test.
  43
  44my $gen_heredoc = 0;
  45my $gen_version = '';
  46
  47GetOptions("children!" => \$show_children,
  48           "exec!"     => \$show_exec,
  49           "threads!"  => \$show_threads,
  50           "HEREDOC!"  => \$gen_heredoc,
  51           "VERSION=s" => \$gen_version    )
  52    or die("Error in command line arguments\n");
  53
  54
  55# SIDs contains timestamps and PIDs of the process and its parents.
  56# This makes it difficult to match up in a HEREDOC in the test script.
  57# Build a map from actual SIDs to predictable constant values and yet
  58# keep the parent/child relationships.  For example:
  59# {..., "sid":"1539706952458276-8652", ...}
  60# {..., "sid":"1539706952458276-8652/1539706952649493-15452", ...}
  61# becomes:
  62# {..., "sid":"_SID1_", ...}
  63# {..., "sid":"_SID1_/_SID2_", ...}
  64my $sid_map;
  65my $sid_count = 0;
  66
  67my $processes;
  68
  69while (<>) {
  70    my $line = decode_json( $_ );
  71
  72    my $sid = "";
  73    my $sid_sep = "";
  74
  75    my $raw_sid = $line->{'sid'};
  76    my @raw_sid_parts = split /\//, $raw_sid;
  77    foreach my $raw_sid_k (@raw_sid_parts) {
  78        if (!exists $sid_map->{$raw_sid_k}) {
  79            $sid_map->{$raw_sid_k} = '_SID' . $sid_count . '_';
  80            $sid_count++;
  81        }
  82        $sid = $sid . $sid_sep . $sid_map->{$raw_sid_k};
  83        $sid_sep = '/';
  84    }
  85    
  86    my $event = $line->{'event'};
  87
  88    if ($event eq 'version') {
  89        $processes->{$sid}->{'version'} = $line->{'exe'};
  90        if ($gen_heredoc == 1 && $gen_version eq $line->{'exe'}) {
  91            # If we are generating data FOR the test script, replace
  92            # the reported git.exe version with a reference to an
  93            # environment variable.  When our output is pasted into
  94            # the test script, it will then be expanded in future
  95            # test runs to the THEN current version of git.exe.
  96            # We assume that the test script uses env var $V.
  97            $processes->{$sid}->{'version'} = "\$V";
  98        }
  99    }
 100
 101    elsif ($event eq 'start') {
 102        $processes->{$sid}->{'argv'} = $line->{'argv'};
 103        $processes->{$sid}->{'argv'}[0] = "_EXE_";
 104    }
 105
 106    elsif ($event eq 'exit') {
 107        $processes->{$sid}->{'exit_code'} = $line->{'code'};
 108    }
 109
 110    elsif ($event eq 'atexit') {
 111        $processes->{$sid}->{'exit_code'} = $line->{'code'};
 112    }
 113
 114    elsif ($event eq 'error') {
 115        # For HEREDOC purposes, use the error message format string if
 116        # available, rather than the formatted message (which probably
 117        # has an absolute pathname).
 118        if (exists $line->{'fmt'}) {
 119            push( @{$processes->{$sid}->{'errors'}}, $line->{'fmt'} );
 120        }
 121        elsif (exists $line->{'msg'}) {
 122            push( @{$processes->{$sid}->{'errors'}}, $line->{'msg'} );
 123        }
 124    }
 125
 126    elsif ($event eq 'cmd_path') {
 127        ## $processes->{$sid}->{'path'} = $line->{'path'};
 128        #
 129        # Like in the 'start' event, we need to replace the value of
 130        # argv[0] with a token for HEREDOC purposes.  However, the
 131        # event is only emitted when RUNTIME_PREFIX is defined, so
 132        # just omit it for testing purposes.
 133        # $processes->{$sid}->{'path'} = "_EXE_";
 134    }
 135    
 136    elsif ($event eq 'cmd_name') {
 137        $processes->{$sid}->{'name'} = $line->{'name'};
 138        $processes->{$sid}->{'hierarchy'} = $line->{'hierarchy'};
 139    }
 140
 141    elsif ($event eq 'alias') {
 142        $processes->{$sid}->{'alias'}->{'key'} = $line->{'alias'};
 143        $processes->{$sid}->{'alias'}->{'argv'} = $line->{'argv'};
 144    }
 145
 146    elsif ($event eq 'def_param') {
 147        my $kv;
 148        $kv->{'param'} = $line->{'param'};
 149        $kv->{'value'} = $line->{'value'};
 150        push( @{$processes->{$sid}->{'params'}}, $kv );
 151    }
 152
 153    elsif ($event eq 'child_start') {
 154        if ($show_children == 1) {
 155            $processes->{$sid}->{'child'}->{$line->{'child_id'}}->{'child_class'} = $line->{'child_class'};
 156            $processes->{$sid}->{'child'}->{$line->{'child_id'}}->{'child_argv'} = $line->{'argv'};
 157            $processes->{$sid}->{'child'}->{$line->{'child_id'}}->{'child_argv'}[0] = "_EXE_";
 158            $processes->{$sid}->{'child'}->{$line->{'child_id'}}->{'use_shell'} = $line->{'use_shell'} ? 1 : 0;
 159        }
 160    }
 161
 162    elsif ($event eq 'child_exit') {
 163        if ($show_children == 1) {
 164            $processes->{$sid}->{'child'}->{$line->{'child_id'}}->{'child_code'} = $line->{'code'};
 165        }
 166    }
 167
 168    # TODO decide what information we want to test from thread events.
 169
 170    elsif ($event eq 'thread_start') {
 171        if ($show_threads == 1) {
 172        }
 173    }
 174
 175    elsif ($event eq 'thread_exit') {
 176        if ($show_threads == 1) {
 177        }
 178    }
 179
 180    # TODO decide what information we want to test from exec events.
 181
 182    elsif ($event eq 'exec') {
 183        if ($show_exec == 1) {
 184        }
 185    }
 186
 187    elsif ($event eq 'exec_result') {
 188        if ($show_exec == 1) {
 189        }
 190    }
 191
 192    elsif ($event eq 'def_param') {
 193        # Accumulate parameter key/value pairs by key rather than in an array
 194        # so that we get overwrite (last one wins) effects.
 195        $processes->{$sid}->{'params'}->{$line->{'param'}} = $line->{'value'};
 196    }
 197
 198    elsif ($event eq 'def_repo') {
 199        # $processes->{$sid}->{'repos'}->{$line->{'repo'}} = $line->{'worktree'};
 200        $processes->{$sid}->{'repos'}->{$line->{'repo'}} = "_WORKTREE_";
 201    }
 202
 203    # A series of potentially nested and threaded region and data events
 204    # is fundamentally incompatibile with the type of summary record we
 205    # are building in this script.  Since they are intended for
 206    # perf-trace-like analysis rather than a result summary, we ignore
 207    # most of them here.
 208
 209    # elsif ($event eq 'region_enter') {
 210    # }
 211    # elsif ($event eq 'region_leave') {
 212    # }
 213
 214    elsif ($event eq 'data') {
 215        my $cat = $line->{'category'};
 216        if ($cat eq 'test_category') {
 217            
 218            my $key = $line->{'key'};
 219            my $value = $line->{'value'};
 220            $processes->{$sid}->{'data'}->{$cat}->{$key} = $value;
 221        }
 222    }
 223
 224    # This trace2 target does not emit 'printf' events.
 225    #
 226    # elsif ($event eq 'printf') {
 227    # }
 228}
 229
 230# Dump the resulting hash into something that we can compare against
 231# in the test script.  These options make Dumper output look a little
 232# bit like JSON.  Also convert variable references of the form "$VAR*"
 233# so that the matching HEREDOC doesn't need to escape it.
 234
 235$Data::Dumper::Sortkeys = 1;
 236$Data::Dumper::Indent = 1;
 237$Data::Dumper::Purity = 1;
 238$Data::Dumper::Pair = ':';
 239
 240my $out = Dumper($processes);
 241$out =~ s/'/"/g;
 242$out =~ s/\$VAR/VAR/g;
 243
 244# Finally, if we're running this script to generate (manually confirmed)
 245# data to add to the test script, guard the indentation.
 246
 247if ($gen_heredoc == 1) {
 248    $out =~ s/^/\t\|/gms;
 249}
 250
 251print $out;