logparse / parsers / cron.pyon commit rename parsers, better journald integration (e1f7605)
   1"""
   2Get information about executed cron commands - number of commands, list
   3of commands, and list of commands per user. Uses either journald or plain
   4logfiles (path specified in config).
   5
   6NOTE: This parser supports reading from both journald and plain syslog files. 
   7By default the plain logfiles will be used, but the journald option is 
   8preferred for newer systems which support it. To use the journald mode, 
   9specify the parser as `cron_journald` instead of `cron`.
  10
  11NOTE: If using journald, the log level for cron.service should be at least 2 
  12(default is 1). This can be changed with `sudo systemctl edit cron --full`, 
  13and ammend `-L 2` to the ExecStart command.
  14
  15TODO: also output a list of scheduled (future) jobs
  16"""
  17
  18import datetime
  19import re
  20
  21from logparse.formatting import *
  22from logparse.util import readlog
  23from logparse import config
  24from logparse.load_parsers import Parser
  25
  26class CronCommand:
  27    """
  28    Class representing a single cron session. Assigns its own variables of 
  29    date, user and cmd when given a `systemd.journal.Record` object or a plain 
  30    log message string on initialisation. NOTE: This class is used in both
  31    `cron.py` and `cron_journald.py`.
  32    """
  33
  34    def __init__(self, record, datefmt=""):
  35        """
  36        Parse the date, user and command from the logfile string or record
  37        """
  38        if isinstance(record, str):
  39            if not datefmt:
  40                logger.error("Date format not provided - cannot parse this "
  41                        "log message")
  42            # Parse from a raw logfile string
  43            self.date, self.user, self.cmd = re.search(
  44                    r"^(?P<time>.+)\s\w+\sCRON"
  45            "\[\d+\]:\s\((?P<user>\S*)\)\sCMD\s\(+(\[\d+\]\s)?(?P<cmd>.*)\)+",
  46                    record).groupdict().values()
  47            self.date = datetime.datetime.strptime(self.date, datefmt)
  48            if not "Y" in datefmt:
  49                self.date = self.date.replace(year=datetime.datetime.now().year)
  50        elif isinstance(record, dict):
  51            self.date = record["_SOURCE_REALTIME_TIMESTAMP"]
  52            self.user, self.cmd = re.search(r"\((?P<user>\S+)\) "
  53                    "CMD \((\[\d+\] )?(?P<cmd>.*)\)", record["MESSAGE"]) \
  54                    .groupdict().values()
  55            self.cmd = " ".join(self.cmd.split())
  56        else:
  57            raise TypeError("record should be str or dict")
  58
  59    def truncate(self):
  60        """
  61        Hide the full directory path for any scripts or explicit binary
  62        references in the command. e.g. `/usr/bin/cat` → `cat`
  63        """
  64        self.cmd = re.sub(r"(\s|^)/\S*/(\S+)", r"\1\2", self.cmd)
  65        return self.cmd
  66
  67    def match_user(self, pattern):
  68        """
  69        Check if the user of this object matches against a regex string and 
  70        return a boolean result of this comparison.
  71        """
  72        user_match = False
  73        for p in pattern:
  74            user_match = re.fullmatch(p, self.user) \
  75                or user_match
  76        return user_match
  77
  78    def match_cmd(self, pattern):
  79        """
  80        Check if the command of this object matches against a regex string and 
  81        return a boolean result of this comparison.
  82        """
  83        cmd_match = False
  84        for p in pattern:
  85            cmd_match = re.fullmatch(p, self.user) \
  86                or cmd_match
  87        return cmd_match 
  88
  89
  90class Cron(Parser):
  91
  92    def __init__(self):
  93        super().__init__()
  94        self.name = "cron"
  95        self.info = "List the logged (executed) cron jobs and their commands"
  96        self.journald = False
  97
  98    def _get_journald(self, startdate):
  99        from systemd import journal
 100        j = journal.Reader()
 101        j.this_machine()
 102        j.log_level(journal.LOG_INFO)
 103        j.add_match(_COMM="cron")
 104        j.seek_realtime(startdate)
 105        return [entry for entry in j if "MESSAGE" in entry 
 106                and " CMD " in entry["MESSAGE"]]
 107
 108    def _get_logfile(self, path):
 109        from logparse.util import readlog
 110        return [x for x in readlog(path).splitlines() if " CMD " in x] 
 111
 112    def parse_log(self):
 113
 114        logger.debug("Starting cron section")
 115        section = Section("cron")
 116
 117        if not (config.prefs.getboolean("cron", "summary") 
 118                or config.prefs.getboolean("cron", "list-users")):
 119            logger.warning("Both summary and list-users configuration options "
 120                "are set to false, so no output will be generated. "
 121                "Skipping this parser.")
 122            return None
 123
 124        datefmt = config.prefs.get("cron", "datetime-format")
 125        if not datefmt:
 126            datefmt = config.prefs.get("logparse", "datetime-format")
 127        if not datefmt:
 128            logger.error("Invalid datetime-format configuration parameter")
 129            return None
 130
 131        command_objects = []
 132        users = {}
 133        oldlog_buffer = 0
 134
 135        if self.journald:
 136            logger.debug("Searching for cron commands in journald")
 137            messages = self._get_journald(section.period.startdate)
 138        else:
 139            logger.debug("Searching for matches in {0}".format(
 140                config.prefs.get("logs", "cron")))
 141            messages = self._get_logfile(config.prefs.get("logs", "cron"))
 142
 143        if len(messages) < 1:
 144            logger.error("Couldn't find any cron log messages")
 145            return
 146
 147        for msg in messages:
 148
 149            try:
 150                cmd_obj = CronCommand(msg, datefmt)
 151            except Exception as e:
 152                logger.warning("Malformed cron session log: {0}. "
 153                    "Error message: {1}".format(msg, str(e)))
 154                continue
 155            else:
 156                if cmd_obj.date < section.period.startdate:
 157                    continue
 158                if not (cmd_obj.match_user(config.prefs.get("cron", "users")
 159                    .split()) and cmd_obj.match_cmd(config.prefs.get(
 160                        "cron", "commands").split())):
 161                    logger.debug("Ignoring cron session by {0} with command "
 162                        "{1} due to config".format(cmd_obj.user, cmd_obj.cmd))
 163                    continue
 164
 165            if config.prefs.getboolean("cron", "truncate-commands"):
 166                cmd_obj.truncate()
 167
 168            command_objects.append(cmd_obj)
 169            if not cmd_obj.user in users:
 170                users[cmd_obj.user] = []
 171            users[cmd_obj.user].append(cmd_obj.cmd)
 172
 173        if len(command_objects) == 0:
 174            logger.error("No valid cron commands found")
 175            return
 176
 177        logger.info("Found {0} cron jobs".format(len(command_objects)))
 178
 179        if config.prefs.getboolean("cron", "summary"):
 180            summary_data = Data()
 181            summary_data.subtitle = "Total of " + plural("cron session",
 182                    len(command_objects)) + " for " + plural("user",
 183                            len(users))
 184            summary_data.items = ["{}: `{}`".format(c.user, c.cmd)
 185                    for c in command_objects]
 186            summary_data.orderbyfreq()
 187            summary_data.truncl(config.prefs.getint("logparse", "maxcmd"))
 188            section.append_data(summary_data)
 189
 190        if config.prefs.getboolean("cron", "list-users"):
 191            for user, cmdlist in users.items():
 192                user_data = Data()
 193                user_data.subtitle = plural("session", len(cmdlist)) \
 194                        + " for " + user + (" (" + plural("unique command",
 195                            len(set(cmdlist))) + ")" if len(set(cmdlist)) > 1
 196                            else "")
 197                user_data.items = ["`{}`".format(cmd) for cmd in cmdlist]
 198                user_data.orderbyfreq()
 199                user_data.truncl(config.prefs.getint("logparse", "maxcmd"))
 200                section.append_data(user_data)
 201
 202        logger.info("Finished cron section")
 203        return section