From 7351cf6ee6d6fabac7f328dc5020ada67554636f Mon Sep 17 00:00:00 2001 From: Andrew Lorimer Date: Fri, 30 Aug 2019 22:05:38 +1000 Subject: [PATCH] add journald communication capability --- logparse/config.py | 2 +- logparse/formatting.py | 20 ++++-- logparse/parsers/cron-journald.py | 62 +++++++++++++++++ logparse/parsers/cron.py | 17 +++-- logparse/parsers/httpd.py | 110 ++++++++++++++++-------------- logparse/parsers/journaltest.py | 11 +++ logparse/parsers/load_parsers.py | 6 +- logparse/parsers/mem.py | 18 ++--- logparse/parsers/sshd-journald.py | 77 +++++++++++++++++++++ logparse/parsers/sshd.py | 48 ++++++++++++- logparse/util.py | 4 +- 11 files changed, 295 insertions(+), 80 deletions(-) create mode 100644 logparse/parsers/cron-journald.py create mode 100644 logparse/parsers/journaltest.py create mode 100644 logparse/parsers/sshd-journald.py diff --git a/logparse/config.py b/logparse/config.py index 442de85..c7455bd 100644 --- a/logparse/config.py +++ b/logparse/config.py @@ -47,7 +47,7 @@ defaults = Configuration({ 'overwrite': False, 'title': logparse.__name__, 'maxlist': 10, - 'maxcmd': 3, + 'maxcmd': 6, 'resolve-domains': 'fqdn', 'mail': { 'to': '', diff --git a/logparse/formatting.py b/logparse/formatting.py index c7b7c30..b303449 100644 --- a/logparse/formatting.py +++ b/logparse/formatting.py @@ -265,9 +265,11 @@ class Data: Truncate self.items to a specified value and state how many items are hidden. """ if (len(self.items) > limit): - more = str(len(self.items) - limit) + more = len(self.items) - limit + if more == 1: + return 0 self.items = self.items[:limit] - self.items.append("+ {0} more".format(more)) + self.items.append("+ {0} more".format(str(more))) def orderbyfreq(self): """ @@ -587,14 +589,20 @@ def backticks(l): return ["`" + x + "`" for x in l] -def plural(noun, quantity): +def plural(noun, quantity, print_quantity=True): """ Return "1 noun" or "n nouns" """ - if (quantity == 1): - return(str(quantity) + " " + noun) + if print_quantity: + if (quantity == 1): + return(str(quantity) + " " + noun) + else: + return(str(quantity) + " " + noun + "s") else: - return(str(quantity) + " " + noun + "s") + if (quantity == 1): + return noun + else: + return noun + "s" def parsesize(num, suffix='B'): diff --git a/logparse/parsers/cron-journald.py b/logparse/parsers/cron-journald.py new file mode 100644 index 0000000..9df7e2b --- /dev/null +++ b/logparse/parsers/cron-journald.py @@ -0,0 +1,62 @@ +# +# cron-journald.py +# +# List the logged (executed) cron jobs and their commands (uses journald module) +# +# TODO: also output a list of scheduled (future) jobs +# + +from systemd import journal + +from logparse.formatting import * +from logparse import config + +import logging +logger = logging.getLogger(__name__) + +def parse_log(): + + logger.debug("Starting cron section") + section = Section("cron") + + # Initiate journald reader + j = journal.Reader() + j.this_boot() + j.this_machine() + j.log_level(journal.LOG_INFO) + j.add_match(_COMM="cron") + + logger.info("Obtaining cron logs") + + messages = [entry["MESSAGE"] for entry in j if "MESSAGE" in entry and " CMD " in entry["MESSAGE"]] + + total_jobs = len(messages) + + if total_jobs == 0: + logger.warning("Couldn't find any cron commands") + return 1 + + logger.info("Found " + str(total_jobs) + " cron jobs") + section.append_data(Data("Total of " + plural("cron session", total_jobs) + " executed across all users")) + + logger.debug("Analysing cron commands for each user") + users = {} + + for msg in messages: + usr_cmd = re.search('\((\S+)\) CMD (.*)', msg) # [('user', 'cmd')] + if usr_cmd: + if not usr_cmd.group(1) in users: + users[usr_cmd.group(1)] = [] + users[usr_cmd.group(1)].append(usr_cmd.group(2)) + + for usr, cmdlist in users.items(): + user_data = Data() + user_data.subtitle = plural("cron sessions", len(cmdlist)) + " for " + usr + user_data.items = ("`{0}`".format(cmd) for cmd in cmdlist) + user_data.orderbyfreq() + user_data.truncl(config.prefs['maxcmd']) + section.append_data(user_data) + + logger.info("Finished cron section") + + return section diff --git a/logparse/parsers/cron.py b/logparse/parsers/cron.py index 01a6135..4408ba2 100644 --- a/logparse/parsers/cron.py +++ b/logparse/parsers/cron.py @@ -1,23 +1,32 @@ # # cron.py # -# List the logged (executed) cron jobs and their commands -# TODO: also output a list of scheduled (future) jobs +# List the logged (executed) cron jobs and their commands (uses syslog file) +# +# NOTE: This file is now deprecated in favour of the newer journald mechanism +# used in cron-journald.py. This parser is still functional but is slower and +# has less features. Please switch over if possible. +# import re from ..formatting import * from ..util import readlog, resolve from .. import config +from .. import util import logging logger = logging.getLogger(__name__) def parse_log(): + + logger.warning("NOTE: This cron parser is now deprecated. Please use cron-journald if possible.") + logger.debug("Starting cron section") section = Section("cron") + matches = re.findall('.*CMD\s*\(\s*(?!.*cd)(.*)\)', readlog(config.prefs['logs']['cron'])) - num = sum(1 for line in matches) + num = len(matches) commands = [] for match in matches: commands.append(str(match)) @@ -27,7 +36,7 @@ def parse_log(): jobs_data = Data(str(num) + " cron jobs run") section.append_data(jobs_data) - if (len(matches) > 0): + if (num > 0): logger.debug("Analysing cron commands") cmd_data = Data("Top cron commands") cmd_data.items = ("`{0}`".format(x) for x in commands) diff --git a/logparse/parsers/httpd.py b/logparse/parsers/httpd.py index d0e800c..518af7a 100644 --- a/logparse/parsers/httpd.py +++ b/logparse/parsers/httpd.py @@ -17,66 +17,76 @@ from .. import config import logging logger = logging.getLogger(__name__) +ACCESS_REGEX = "^\s*(\S+).*\"GET (\S+) HTTP(?:\/\d\.\d)?\" (\d{3}) (\d*) \".+\" \"(.*)\"" + +class AccessLine(object): + + def __init__(self, line): + self.line = line + fields = re.search(ACCESS_REGEX, line) + + self.client = fields.group(1) + self.file = fields.group(2) + self.statuscode = int(fields.group(3)) + self.bytes = int(fields.group(4)) + self.useragent = fields.group(5) + def parse_log(): + logger.debug("Starting httpd section") section = Section("httpd") + accesslog = readlog(config.prefs['logs']['httpd'] + '/access.log') - a = len(accesslog.split('\n')) + errorlog = readlog(config.prefs['logs']['httpd'] + '/error.log') - e = len(errorlog.split('\n')) - data_b = 0 - ips = [] - files = [] - useragents = [] + total_errors = len(errorlog.splitlines()) + + logger.debug("Retrieved log data") + errors = [] notfound = [] unprivileged = [] logger.debug("Searching through access log") - for line in accesslog.split('\n'): - fields = re.search('^(\S*) .*GET (\/.*) HTTP/\d\.\d\" 200 (\d*) \"(.*)\".*\((.*)\;', line) - try: - ips.append(resolve(fields.group(1), fqdn=config.prefs['httpd']['resolve-domains'])) - files.append(fields.group(2)) - useragents.append(fields.group(5)) - data_b += int(fields.group(3)) - except Exception as error: - if type(error) is AttributeError: # this line is not an access log - pass - else: - logger.warning("Error processing httpd access log: " + str(error)) - traceback.print_exc() - data_h = parsesize(data_b) - - logger.info("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors") - if (a > 0): - logger.debug("Parsing request statistics (this might take a while)") - request_data = Data() - request_data.items = backticks(files) - request_data.orderbyfreq() - request_data.truncl(config.prefs['maxlist']) - request_data.subtitle = plural(" request", a) - section.append_data(request_data) - if (ips != None): - logger.debug("Parsing client statistics") - client_data = Data() - client_data.items = ips - client_data.orderbyfreq() - client_data.subtitle = plural(" client", str(len(ips))) - client_data.truncl(config.prefs['maxlist']) - section.append_data(client_data) - if (useragents != None): - logger.debug("Parsing user agent statistics") - ua_data = Data() - ua_data.items = useragents - ua_data.orderbyfreq() - n_ua = str(len(ua_data.items)) - ua_data.truncl(config.prefs['maxlist']) - ua_data.subtitle = plural(" user agent", n_ua) - section.append_data(ua_data) - - section.append_data(Data(data_h + " transferred")) - section.append_data(Data(plural(" error", e))) + + accesses = [] + + for line in accesslog.splitlines(): + if "GET" in line: + accesses.append(AccessLine(line)) + + total_requests = len(accesses) + + section.append_data(Data("Total of " + plural("request", total_requests))) + section.append_data(Data(plural("error", total_errors))) + + size = Data() + size.subtitle = "Transferred " + parsesize(sum([ac.bytes for ac in accesses])) + section.append_data(size) + + clients = Data() + clients.items = [resolve(ac.client, "fqdn") for ac in accesses] + clients.orderbyfreq() + clients.subtitle = "Received requests from " + plural("client", len(clients.items)) + clients.truncl(config.prefs['maxlist']) + section.append_data(clients) + + files = Data() + files.items = [ac.file for ac in accesses] + files.orderbyfreq() + files.subtitle = plural("file", len(files.items)) + " requested" + files.truncl(config.prefs['maxlist']) + section.append_data(files) + + useragents = Data() + useragents.items = [ac.useragent for ac in accesses] + useragents.orderbyfreq() + useragents.subtitle = plural("user agent", len(useragents.items)) + useragents.truncl(config.prefs['maxlist']) + section.append_data(useragents) + + logger.info("httpd has received " + str(total_requests) + " requests with " + str(total_errors) + " errors") + logger.info("Finished httpd section") return section diff --git a/logparse/parsers/journaltest.py b/logparse/parsers/journaltest.py new file mode 100644 index 0000000..2d0016f --- /dev/null +++ b/logparse/parsers/journaltest.py @@ -0,0 +1,11 @@ +from ..formatting import * +from .. import config + +import logging +logger = logging.getLogger(__name__) + +def parse_log(): + + parser = util.JournalParser() + parser.parse() + diff --git a/logparse/parsers/load_parsers.py b/logparse/parsers/load_parsers.py index 0dc291a..85ad141 100644 --- a/logparse/parsers/load_parsers.py +++ b/logparse/parsers/load_parsers.py @@ -14,7 +14,8 @@ from typing import NamedTuple parser_dir = "/usr/share/logparse/" main_module = "__init__" -default_parsers = ["cron", "httpd", "mem", "postfix", "smbd", "sshd", "sudo", "sysinfo", "temperature", "zfs"] +default_parsers = ["cron-journald", "httpd", "mem", "postfix", "smbd", "sshd-journald", "sudo", "sysinfo", "temperature", "zfs"] +deprecated_parsers = ["sshd", "cron"] import logging logger = logging.getLogger(__name__) @@ -46,6 +47,9 @@ def search(name): if name in default_parsers: logger.debug("Found parser {0} in default modules".format(name)) return Parser('.'.join(__name__.split('.')[:-1] + [name])) + elif name in deprecated_parsers: + logger.debug("Found parser {0} in deprecated modules".format(name)) + return Parser('.'.join(__name__.split('.')[:-1] + [name])) else: return None diff --git a/logparse/parsers/mem.py b/logparse/parsers/mem.py index b6f8f8d..20baa85 100644 --- a/logparse/parsers/mem.py +++ b/logparse/parsers/mem.py @@ -23,25 +23,15 @@ def parse_log(): table.add_row(Row([Column("Installed"), Column(parsesize(ram_b))])) raw_mem = util.readlog(config.prefs['logs']['meminfo']) - total_regex = re.compile("(MemTotal:\s*| kB)+") - free_regex = re.compile("MemFree:\s*") + line_regex = re.compile("^Mem(\w+):\s*(\d*)\s*kB$") for line in raw_mem.splitlines(): - matches = re.findall("^Mem(\w+):\s*(\d*)\s*kB$", line) + + matches = line_regex.findall(line) + if len(matches) > 0: logger.debug("Detected {0} memory of {1} kB".format(matches[0][0].lower(), matches[0][1])) table.add_row(Row([Column(matches[0][0]), Column(parsesize(float(matches[0][1])*1000))])) -# if "Mem" in line: -# total = line_regex.sub("", line, 1) -# processor = proc_regex.sub("", processor) -# if not processor in proc_data.items: -# proc_data.items.append(processor) -# else: -# logger.debug("Found duplicate entry (perhaps multiple cores?) for {0}".format(processor)) -# if len(proc_data.items) > 0: -# section.append_data(proc_data) -# else: -# logger.warning("Failed to find processor data") table.align_column(0, "right") section.append_table(table) diff --git a/logparse/parsers/sshd-journald.py b/logparse/parsers/sshd-journald.py new file mode 100644 index 0000000..8b58b0c --- /dev/null +++ b/logparse/parsers/sshd-journald.py @@ -0,0 +1,77 @@ +# +# sshd.py +# +# Find number of ssh logins and authorised users +# + +import re +from systemd import journal + +from logparse.formatting import * +from logparse.util import resolve +from logparse import config + +import logging +logger = logging.getLogger(__name__) + +def parse_log(): + + logger.debug("Starting sshd section") + section = Section("ssh") + + j = journal.Reader() + j.this_boot() + j.log_level(journal.LOG_DEBUG) + j.add_match(_COMM="sshd") + + messages = [entry["MESSAGE"] for entry in j if "MESSAGE" in entry] + + login_data = Data("successful", []) + invalid_data = Data("invalid", []) + failed_data = Data("failed", []) + + for msg in messages: + + if "Accepted publickey" in msg: + entry = re.search('^.*publickey\sfor\s(\w*)\sfrom\s(\S*)', msg) # [('user', 'ip')] + user = entry.group(1) + ip = entry.group(2) + + userhost = user + '@' + resolve(ip, fqdn=config.prefs['sshd']['resolve-domains']) + login_data.items.append(userhost) + + elif "Connection closed by authenticating user root" in msg: + entry = re.search('^.*Connection closed by authenticating user (\S+) (\S+)', msg) # [('user', 'ip')] + user = entry.group(1) + ip = entry.group(2) + + userhost = user + '@' + resolve(ip, fqdn=config.prefs['sshd']['resolve-domains']) + failed_data.items.append(userhost) + + elif "Invalid user" in msg: + entry = re.search('^.*Invalid user (\S+) from (\S+).*', msg) # [('user', 'ip')] + user = entry.group(1) + ip = entry.group(2) + + userhost = user + '@' + resolve(ip, fqdn=config.prefs['sshd']['resolve-domains']) + invalid_data.items.append(userhost) + + login_data.subtitle = plural("successful login", len(login_data.items)) + " from" + login_data.orderbyfreq() + login_data.truncl(config.prefs['maxlist']) + + invalid_data.subtitle = plural("attempted login", len(invalid_data.items)) + invalid_data.orderbyfreq() + invalid_data.subtitle += plural(" from invalid user", len(invalid_data.items), False) + invalid_data.truncl(config.prefs['maxlist']) + + failed_data.subtitle = plural("failed login", len(failed_data.items)) + " from" + failed_data.orderbyfreq() + failed_data.truncl(config.prefs['maxlist']) + + section.append_data(login_data) + section.append_data(invalid_data) + section.append_data(failed_data) + + logger.info("Finished sshd section") + return section diff --git a/logparse/parsers/sshd.py b/logparse/parsers/sshd.py index f233a84..18b1799 100644 --- a/logparse/parsers/sshd.py +++ b/logparse/parsers/sshd.py @@ -1,7 +1,11 @@ # -# sshd.py +# sshd_auth.py # -# Find number of ssh logins and authorised users +# Find number of ssh logins and authorised users (uses /var/log/auth.log) +# +# NOTE: This file is now deprecated in favour of the newer journald mechanism +# used in sshd-journald.py. This parser is still functional but is slower and +# has less features. Please switch over if possible. # import re @@ -14,15 +18,26 @@ import logging logger = logging.getLogger(__name__) def parse_log(): + + logger.warning("NOTE: This sshd parser is now deprecated. Please use sshd-journald if possible.") + logger.debug("Starting sshd section") section = Section("ssh") logger.debug("Searching for matches in {0}".format(config.prefs['logs']['auth'])) matches = re.findall('.*sshd.*Accepted publickey for .* from .*', readlog(config.prefs['logs']['auth'])) # get all logins logger.debug("Finished searching for logins") + + logger.debug("Searching for matches in {0}".format(config.prefs['logs']['auth'])) + authlog = readlog(config.prefs['logs']['auth']) + + matches = re.findall('.*sshd.*Accepted publickey for .* from .*', authlog) # get all logins + invalid_matches = re.findall(".*sshd.*Invalid user .* from .*", authlog) + root_matches = re.findall("Disconnected from authenticating user root", authlog) + logger.debug("Finished searching for logins") users = [] # list of users with format [username, number of logins] for each item data = [] - num = sum(1 for x in matches) # total number of logins + num = len(matches) # total number of logins for match in matches: entry = re.search('^.*publickey\sfor\s(\w*)\sfrom\s(\S*)', match) # [('user', 'ip')] @@ -42,5 +57,32 @@ def parse_log(): auth_data.truncl(config.prefs['maxlist']) logger.debug("Found " + str(len(matches)) + " ssh logins for users " + str(data)) section.append_data(auth_data) + + invalid_users = [] + for match in invalid_matches: + entry = re.search('^.*Invalid user (\S+) from (\S+).*', match) # [('user', 'ip')] + + try: + user = entry.group(1) + ip = entry.group(2) + except: # blank user field + continue + + userhost = user + '@' + ip + invalid_users.append(userhost) + logger.debug("Parsed list of invalid users") + invalid_data = Data(subtitle=plural("attempted login", len(invalid_matches)) + " from " + plural("invalid user", len(invalid_users), print_quantity=False), items=invalid_users) + if (len(invalid_data.items) == 1): # if only one user, do not display no of logins for this user + logger.debug("Found " + str(len(invalid_matches)) + " SSH login attempts for invalid user " + invalid_users[0]) + invalid_data.subtitle += ' ' + invalid_data.items[0] + invalid_data.orderbyfreq() + invalid_data.truncl(config.prefs['maxlist']) + logger.debug("Found " + str(len(invalid_matches)) + " SSH login attempts for invalid users " + str(data)) + section.append_data(invalid_data) + + logger.debug("Found {0} attempted logins for root".format(str(len(root_matches)))) + + section.append_data(Data(subtitle=plural("attempted login", str(len(root_matches))) + " for root")) + logger.info("Finished sshd section") return section diff --git a/logparse/util.py b/logparse/util.py index 1a1cca5..2f30637 100644 --- a/logparse/util.py +++ b/logparse/util.py @@ -8,6 +8,8 @@ import re import os import socket import inspect +from systemd import journal +from datetime import datetime, timedelta import logging logger = logging.getLogger(__name__) @@ -29,7 +31,7 @@ def getlocaldomain(): # get the parent fqdn of current server else: return domain[-1] -def resolve(ip, fqdn = 'host-only'): # try to resolve an ip to hostname +def resolve(ip, fqdn=None): # try to resolve an ip to hostname # Possible values for fqdn: # fqdn show full hostname and domain # fqdn-implicit show hostname and domain unless local -- 2.47.1