From: Andrew Lorimer Date: Wed, 28 Aug 2019 06:41:18 +0000 (+1000) Subject: update parsers to new output model X-Git-Url: https://git.lorimer.id.au/logparse.git/diff_plain/94973e5bca3633da737c3a2f2295ba9fcfbec646 update parsers to new output model --- diff --git a/logparse/formatting.py b/logparse/formatting.py index d7a9b4d..b2670f8 100644 --- a/logparse/formatting.py +++ b/logparse/formatting.py @@ -3,8 +3,8 @@ # # This file contains global functions for formatting and printing data. This # file should be imported into individual log-parsing scripts located in -# logs/*. Data is all formatted in HTML. Writing to disk and/or emailng data -# is left to __main__.py. +# logs/*. Data is formatted in HTML or plaintext. Writing to disk and/or +# emailng data is left to __main__.py. # import os @@ -30,6 +30,9 @@ LINECHARS_DOUBLE = ['║', '═'] LINECHARS_SINGLE = ['│', '─'] class Output: + """ + Base class for a data processor. + """ def __init__(self): self.content = "" @@ -50,6 +53,9 @@ class Output: class PlaintextOutput(Output): + """ + Processes & outputs data in a plaintext form which can be read with cat or plaintext email. + """ def __init__(self, linewidth=80): self.content = "" @@ -110,6 +116,10 @@ class PlaintextOutput(Output): class HtmlOutput(Output): + """ + Process and output data in HTML format. + All HTML formatting functions now reside in this class to differentiate them from plaintext. + """ def __init__(self): self.content = "" @@ -123,13 +133,13 @@ class HtmlOutput(Output): init_varfilter() headercontent = Template(open(template, 'r').read()) self.append(headercontent.safe_substitute(varsubst)) - self.append(opentag('div', id='main')) + self.append(self.opentag('div', id='main')) def append_footer(self): self.append(closetag('div') + closetag('body') + closetag('html')) def append_section(self, section): - self.append(opentag('div', 1, section.title, 'section')) + self.append(self.opentag('div', 1, section.title, 'section')) self.append(self._gen_title(section.title)) for data in section.data: self.append(self._fmt_data(data.subtitle, data.items)) @@ -158,7 +168,7 @@ class HtmlOutput(Output): else: output = "" output += tag('p', 0, subtitle) - output += opentag('ul', 1) + output += self.opentag('ul', 1) coderegex = re.compile('`(.*)`') for datum in data: if datum == "" or datum == None: @@ -168,8 +178,37 @@ class HtmlOutput(Output): output += closetag('ul', 1) return output + def opentag(self, tag, block = 0, id = None, cl = None): # write html opening tag + output = "" + if (block): + output += '\n' + output += '<' + tag + if (id != None): + output += " id='" + id + "'" + if (cl != None): + output += " class='" + cl + "'" + output += '>' + if (block): + output += '\n' + return output + + def closetag(self, tag, block = 0): # write html closing tag + if (block == 0): + return "" + else: + return "\n\n" + + def tag(self, tag, block = 0, content = ""): # write html opening tag, content, and html closing tag + o = self.opentag(tag, block) + c = self.closetag(tag, block) + return o + content + c + + class Section: + """ + Each parser should output a Section() which contains the title and returned data. + """ def __init__(self, title): self.title = title @@ -179,13 +218,34 @@ class Section: self.data.append(data) class Data: + """ + Each section (parser) can have one or more Data() objects which are essentially glorified lists. + """ - def __init__(self, subtitle, items=None): + def __init__(self, subtitle=None, items=[]): self.subtitle = subtitle self.items = items + def truncl(self, limit): # truncate list + if (len(self.items) > limit): + more = str(len(self.items) - limit) + self.items = self.items[:limit] + self.items..append("+ " + more + " more") + + def orderbyfreq(self, l): # order a list by the frequency of its elements and remove duplicates + temp_l = l[:] + l = list(set(l)) + l = [[i, temp_l.count(i)] for i in l] # add count of each element + l.sort(key=lambda x:temp_l.count(x[0])) # sort by count + l = [i[0] + ' (' + str(i[1]) + ')' for i in l] # put element and count into string + l = l[::-1] # reverse + self.items = l + class PlaintextLine: + """ + Draw a horizontal line for plain text format, with optional padding/styling + """ def __init__(self, linewidth=80, double=True, vpadding=1, hpadding=""): self.linewidth = linewidth @@ -198,6 +258,9 @@ class PlaintextLine: return "\n" * self.vpadding + self.hpadding + line * (self.linewidth - 2 * len(self.hpadding)) + self.hpadding + "\n" * self.vpadding class PlaintextBox: + """ + Draw a rectangular box around text, with customisable padding/size/style + """ def __init__(self, content="", double=True, fullwidth=True, linewidth=80, hpadding="\t", vpadding=1): self.content = content @@ -285,52 +348,12 @@ def writetitle(title): # write title for a section logger.debug("Writing title for " + title) return tag('h2', 0, title) -def opentag(tag, block = 0, id = None, cl = None): # write html opening tag - output = "" - if (block): - output += '\n' - output += '<' + tag - if (id != None): - output += " id='" + id + "'" - if (cl != None): - output += " class='" + cl + "'" - output += '>' - if (block): - output += '\n' - return output - -def closetag(tag, block = 0): # write html closing tag - if (block == 0): - return "" - else: - return "\n\n" - -def tag(tag, block = 0, content = ""): # write html opening tag, content, and html closing tag - o = opentag(tag, block) - c = closetag(tag, block) - return o + content + c - -def orderbyfreq(l): # order a list by the frequency of its elements and remove duplicates - temp_l = l[:] - l = list(set(l)) - l = [[i, temp_l.count(i)] for i in l] # add count of each element - l.sort(key=lambda x:temp_l.count(x[0])) # sort by count - l = [i[0] + ' (' + str(i[1]) + ')' for i in l] # put element and count into string - l = l[::-1] # reverse - return l - def addtag(l, tag): # add prefix and suffix tags to each item in a list l2 = ['<' + tag + '>' + i + '' for i in l] return l2 -def truncl(input, limit): # truncate list - if (len(input) > limit): - more = str(len(input) - limit) - output = input[:limit] - output.append("+ " + more + " more") - return(output) - else: - return(input) +def backticks(l): + return ["`" + x + "`" for x in l] def plural(noun, quantity): # return "1 noun" or "n nouns" if (quantity == 1): @@ -349,26 +372,3 @@ def fsubject(template): # Replace variables in the title template provided in co r = varpattern.sub(lambda m: varfilter[re.escape(m.group(0))], template) logger.debug("Returning subject line " + r) return r - -def writedata(subtitle, data = None): # write title and data - if (subtitle == ""): - logger.warning("No subtitle provided.. skipping section") - return - - if (data == None or len(data) == 0): - logger.debug("No data provided.. just printing subtitle") - return tag('p', 0, subtitle) - else: - logger.debug("Received data " + str(data)) - subtitle += ':' - if (len(data) == 1): - return tag('p', 0, subtitle + ' ' + data[0]) - else: - output = "" - output += tag('p', 0, subtitle) - output += opentag('ul', 1) - for datum in data: - output += tag('li', 0, datum) - output += closetag('ul', 1) - return output - diff --git a/logparse/parsers/httpd.py b/logparse/parsers/httpd.py index 221f982..7175ea8 100644 --- a/logparse/parsers/httpd.py +++ b/logparse/parsers/httpd.py @@ -18,9 +18,8 @@ import logging logger = logging.getLogger(__name__) def parse_log(): - output = '' logger.debug("Starting httpd section") - output += opentag('div', 1, 'httpd', 'section') + section = Section("httpd") accesslog = readlog(config.prefs['logs']['httpd'] + '/access.log') a = len(accesslog.split('\n')) errorlog = readlog(config.prefs['logs']['httpd'] + '/error.log') @@ -48,33 +47,34 @@ def parse_log(): logger.warning("Error processing httpd access log: " + str(error)) traceback.print_exc() data_h = parsesize(data_b) - output += writetitle("apache") logger.info("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors") if (a > 0): logger.debug("Parsing request statistics (this might take a while)") - files = addtag(files, 'code') - files = orderbyfreq(files) - files = truncl(files, config.prefs['maxlist']) - output += writedata(plural(" request", a), files) + request_data = Data() + request_data.items = backticks(files) + request_data.orderbyfreq() + request_data.truncl(config.prefs['maxlist']) + request_data.subtitle = plural(" request", a) + section.append_data(request_data) if (ips != None): logger.debug("Parsing client statistics") - ips = addtag(ips, 'code') - ips = orderbyfreq(ips) - n_ip = str(len(ips)) - ips = truncl(ips, config.prefs['maxlist']) - output += writedata(plural(" client", n_ip), ips) + client_data = Data() + client_data.items = orderbyfreq(ips) + client_data.subtitlte = plural(" client", str(len(ips))) + client_data.truncl(config.prefs['maxlist']) + section.append_data(client_data) if (useragents != None): logger.debug("Parsing user agent statistics") - useragents = addtag(useragents, 'code') - useragents = orderbyfreq(useragents) - n_ua = str(len(useragents)) - useragents = truncl(useragents, config.prefs['maxlist']) - output += writedata(plural(" device", n_ua), useragents) + ua_data = Data() + ua_data.items = orderbyfreq(useragents) + n_ua = str(len(ua_data.items)) + ua_data.truncl(config.prefs['maxlist']) + ua_data.subtitle = plural(" user agent", n_ua) + section.append_data(client_data) - output += writedata(data_h + " transferred") - output += writedata(plural(" error", e)) + section.append_data(Data(data_h + " transferred")) + section.append_data(Data(plural(" error", e))) - output += closetag('div', 1) logger.info("Finished httpd section") - return output + return section diff --git a/logparse/parsers/postfix.py b/logparse/parsers/postfix.py index 90190f3..bee1809 100644 --- a/logparse/parsers/postfix.py +++ b/logparse/parsers/postfix.py @@ -14,10 +14,8 @@ import logging logger = logging.getLogger(__name__) def parse_log(): - output = '' + section = Section("postfix") logger.debug("Starting postfix section") - output += opentag('div', 1, 'postfix', 'section') - output += writetitle("postfix") logger.debug("Searching through postfix logs") messages = re.findall('.*from\=<(.*)>, size\=(\d*),.*\n.*to=<(.*)>', readlog(config.prefs['logs']['postfix'])) r = [] @@ -34,17 +32,18 @@ def parse_log(): logger.debug("Analysing message recipients") if (len(r) > 0): + rec_data = Data() s = list(set(r)) # unique recipients if (len(s) > 1): - r = orderbyfreq(r) - r = truncl(r, config.prefs['maxlist']) - output += writedata(n + " messages sent to", r) + rec_data.items = r + rec_data.orderbyfreq() + rec_data.truncl(config.prefs['maxlist']) else: - output += writedata(n + " messages sent to " + r[0]) + rec_data.subtitle = n + " messages sent to " + r[0] + section.append_data(rec_data) else: - output += writedata(n + " messages sent") + section.append_data(Data(subtitle=n + " messages sent"))) logger.info("Found {0} messages sent to {1} recipients".format(n, str(len(r)))) - output += writedata("total of " + size) - output += closetag('div', 1) + section.append_data(Data(subtitle="total of " + size)) logger.info("Finished postfix section") - return output + return section diff --git a/logparse/parsers/smbd.py b/logparse/parsers/smbd.py index 1bf06bd..efe48cd 100644 --- a/logparse/parsers/smbd.py +++ b/logparse/parsers/smbd.py @@ -16,9 +16,8 @@ import logging logger = logging.getLogger(__name__) def parse_log(): - output = '' logger.debug("Starting smbd section") - output += opentag('div', 1, 'smbd', 'section') + section = Section("smbd") files = glob.glob(config.prefs['logs']['smb'] + "/log.*[!\.gz][!\.old]") # find list of logfiles # for f in files: @@ -64,16 +63,15 @@ def parse_log(): # else: # sigma_auths[exists[0]][1] += 1 n_auths += 1 - output += writetitle("samba") - subtitle = plural("login", n_auths) + " from" + auth_data = Data(subtitle=plural("login", n_auths) + " from") if (len(sigma_auths) == 1): # if only one user, do not display no of logins for this user - subtitle += ' ' + sigma_auths[0][0] - output += writedata(subtitle) + auth_data.subtitle += ' ' + sigma_auths[0][0] + section.append_data(auth_data) else: # multiple users - sigma_auths = orderbyfreq(sigma_auths) - sigma_auths = truncl(sigma_auths, config.prefs['maxlist']) + auth_data.items = sigma_auths + auth_data.orderbyfreq() + auth_data.truncl(config.prefs['maxlist']) logger.debug("Found {0} samba logins".format(str(n_auths))) - output += writedata(subtitle, sigma_auths) - output += closetag('div', 1) + section.append_data(auth_data) logger.info("Finished smbd section") - return output + return section diff --git a/logparse/parsers/sshd.py b/logparse/parsers/sshd.py index 524312e..38b3064 100644 --- a/logparse/parsers/sshd.py +++ b/logparse/parsers/sshd.py @@ -14,9 +14,8 @@ import logging logger = logging.getLogger(__name__) def parse_log(): - output = '' logger.debug("Starting sshd section") - output += opentag('div', 1, 'sshd', 'section') + section = Section("ssh") logger.debug("Searching for matches in {0}".format(config.prefs['logs']['auth'])) matches = re.findall('.*sshd.*Accepted publickey for .* from .*', readlog(config.prefs['logs']['auth'])) # get all logins logger.debug("Finished searching for logins") @@ -38,20 +37,17 @@ def parse_log(): users[exists[0]][1] += 1 logger.debug("Parsed list of authorised users") - output += writetitle('sshd') - subtitle = plural('login', num) + ' from' + auth_data = Data(subtitle=plural('login', num) + ' from') + if (len(users) == 1): # if only one user, do not display no of logins for this user logger.debug("found " + str(len(matches)) + " ssh logins for user " + users[0][0]) - subtitle += ' ' + users[0][0] - output += writedata(subtitle) + auth_data.subtitle += ' ' + users[0][0] else: for user in users: - data.append(user[0] + ' (' + str(user[1]) + ')') - if len(data) > config.prefs['maxlist']: # if there are lots of users, truncate them - data.append('+ ' + str(len(users) - config.prefs['maxlist'] - 1) + " more") - break + auth_data.items.append(user[0] + ' (' + str(user[1]) + ')') + auth_data.orderbyfreq() + auth_data.truncl(config.prefs['maxlist']) logger.debug("found " + str(len(matches)) + " ssh logins for users " + str(data)) - output += writedata(subtitle, data) - output += closetag('div', 1) + section.append_data(auth_data) logger.info("Finished sshd section") - return output + return section diff --git a/logparse/parsers/sudo.py b/logparse/parsers/sudo.py index ef74ec2..87d4be7 100644 --- a/logparse/parsers/sudo.py +++ b/logparse/parsers/sudo.py @@ -14,9 +14,8 @@ import logging logger = logging.getLogger(__name__) def parse_log(): - output = '' logger.debug("Starting sudo section") - output += opentag('div', 1, 'sudo', 'section') + section = Section("sudo") logger.debug("Searching for matches in {0}".format(config.prefs['logs']['auth'])) umatches = re.findall('.*sudo:session\): session opened.*', readlog(config.prefs['logs']['auth'])) num = sum(1 for line in umatches) # total number of sessions @@ -35,22 +34,25 @@ def parse_log(): commands.append(cmd) logger.debug("Finished parsing sudo sessions") - output += writetitle("sudo") - subtitle = plural("sudo session", num) + " for" + auth_data = Data(subtitle=plural("sudo session", num) + " for") + if (len(users) == 1): logger.debug("found " + str(num) + " sudo session(s) for user " + str(users[0])) - subtitle += ' ' + users[0][0] - output += writedata(subtitle) + auth_data.subtitle += ' ' + users[0][0] else: for user in users: - data.append(user[0] + ' (' + str(user[1]) + ')') + auth_data.items.append(user[0] + ' (' + str(user[1]) + ')') logger.debug("found " + str(num) + " sudo sessions for users " + str(data)) - output += writedata(subtitle, data) + section.append_data(auth_data) + if (len(commands) > 0): - commands = addtag(commands, 'code') - commands = orderbyfreq(commands) - commands = truncl(commands, config.prefs['maxcmd']) - output += writedata("top sudo commands", [c for c in commands]) - output += closetag('div', 1) - return output + command_data = Data(subtitle="top sudo commands") + commands = backticks(commands) + command_data.items = commands + command_data.orderbyfreq() + command_data.truncl(config.prefs['maxcmd']) + section.append_data(command_data) + logger.info("Finished sudo section") + + return section diff --git a/logparse/parsers/temperature.py b/logparse/parsers/temperature.py index 3441893..2680a44 100644 --- a/logparse/parsers/temperature.py +++ b/logparse/parsers/temperature.py @@ -72,8 +72,7 @@ class HddtempClient: def parse_log(): logger.debug("Starting temp section") - output = writetitle("temperatures") - output += opentag('div', 1, 'temp', 'section') + section = Section("temperatures") # cpu temp