From: Andrew Lorimer <andrew@lorimer.id.au>
Date: Wed, 28 Aug 2019 06:41:18 +0000 (+1000)
Subject: update parsers to new output model
X-Git-Url: https://git.lorimer.id.au/logparse.git/diff_plain/94973e5bca3633da737c3a2f2295ba9fcfbec646

update parsers to new output model
---

diff --git a/logparse/formatting.py b/logparse/formatting.py
index d7a9b4d..b2670f8 100644
--- a/logparse/formatting.py
+++ b/logparse/formatting.py
@@ -3,8 +3,8 @@
 #   
 #   This file contains global functions for formatting and printing data. This
 #   file should be imported into individual log-parsing scripts located in
-#   logs/*. Data is all formatted in HTML. Writing to disk and/or emailng data
-#   is left to __main__.py.
+#   logs/*. Data is formatted in HTML or plaintext. Writing to disk and/or
+#   emailng data is left to __main__.py.
 #
 
 import os
@@ -30,6 +30,9 @@ LINECHARS_DOUBLE = ['â', 'â']
 LINECHARS_SINGLE = ['â', 'â']
 
 class Output:
+    """
+    Base class for a data processor. 
+    """
     
     def __init__(self):
         self.content = ""
@@ -50,6 +53,9 @@ class Output:
 
 
 class PlaintextOutput(Output):
+    """
+    Processes & outputs data in a plaintext form which can be read with cat or plaintext email.
+    """
 
     def __init__(self, linewidth=80):
         self.content = ""
@@ -110,6 +116,10 @@ class PlaintextOutput(Output):
 
 
 class HtmlOutput(Output):
+    """
+    Process and output data in HTML format.
+    All HTML formatting functions now reside in this class to differentiate them from plaintext.
+    """
 
     def __init__(self):
         self.content = ""
@@ -123,13 +133,13 @@ class HtmlOutput(Output):
         init_varfilter()
         headercontent = Template(open(template, 'r').read())
         self.append(headercontent.safe_substitute(varsubst))
-        self.append(opentag('div', id='main'))
+        self.append(self.opentag('div', id='main'))
 
     def append_footer(self):
         self.append(closetag('div') + closetag('body') + closetag('html'))
 
     def append_section(self, section):
-        self.append(opentag('div', 1, section.title, 'section'))
+        self.append(self.opentag('div', 1, section.title, 'section'))
         self.append(self._gen_title(section.title))
         for data in section.data:
             self.append(self._fmt_data(data.subtitle, data.items))
@@ -158,7 +168,7 @@ class HtmlOutput(Output):
             else:
                 output = ""
                 output += tag('p', 0, subtitle)
-                output += opentag('ul', 1)
+                output += self.opentag('ul', 1)
                 coderegex = re.compile('`(.*)`')
                 for datum in data:
                     if datum == "" or datum == None:
@@ -168,8 +178,37 @@ class HtmlOutput(Output):
                 output += closetag('ul', 1)
                 return output
 
+    def opentag(self, tag, block = 0, id = None, cl = None):   # write html opening tag
+        output = ""
+        if (block):
+            output += '\n'
+        output += '<' + tag
+        if (id != None):
+            output += " id='" + id + "'"
+        if (cl != None):
+            output += " class='" + cl + "'"
+        output += '>'
+        if (block):
+            output += '\n'
+        return output
+
+    def closetag(self, tag, block = 0):  # write html closing tag
+        if (block == 0):
+            return "</" + tag + ">"
+        else:
+            return "\n</" + tag + ">\n"
+
+    def tag(self, tag, block = 0, content = ""):  # write html opening tag, content, and html closing tag
+        o = self.opentag(tag, block)
+        c = self.closetag(tag, block)
+        return o + content + c
+
+
 
 class Section:
+    """
+    Each parser should output a Section() which contains the title and returned data.
+    """
 
     def __init__(self, title):
         self.title = title
@@ -179,13 +218,34 @@ class Section:
         self.data.append(data)
 
 class Data:
+    """
+    Each section (parser) can have one or more Data() objects which are essentially glorified lists.
+    """
     
-    def __init__(self, subtitle, items=None):
+    def __init__(self, subtitle=None, items=[]):
         self.subtitle = subtitle
         self.items = items 
 
+    def truncl(self, limit):      # truncate list
+        if (len(self.items) > limit):
+            more = str(len(self.items) - limit)
+            self.items = self.items[:limit]
+            self.items..append("+ " + more + " more")
+
+    def orderbyfreq(self, l):     # order a list by the frequency of its elements and remove duplicates
+        temp_l = l[:]
+        l = list(set(l))
+        l = [[i, temp_l.count(i)] for i in l]   # add count of each element
+        l.sort(key=lambda x:temp_l.count(x[0])) # sort by count
+        l = [i[0] + ' (' + str(i[1]) + ')' for i in l]  # put element and count into string
+        l = l[::-1]     # reverse
+        self.items = l 
+
 
 class PlaintextLine:
+    """
+    Draw a horizontal line for plain text format, with optional padding/styling
+    """
 
     def __init__(self, linewidth=80, double=True, vpadding=1, hpadding=""):
         self.linewidth = linewidth
@@ -198,6 +258,9 @@ class PlaintextLine:
         return "\n" * self.vpadding + self.hpadding +  line * (self.linewidth - 2 * len(self.hpadding)) + self.hpadding + "\n" * self.vpadding
 
 class PlaintextBox:
+    """
+    Draw a rectangular box around text, with customisable padding/size/style
+    """
 
     def __init__(self, content="", double=True, fullwidth=True, linewidth=80, hpadding="\t", vpadding=1):
         self.content = content
@@ -285,52 +348,12 @@ def writetitle(title):  # write title for a section
     logger.debug("Writing title for " + title)
     return tag('h2', 0, title)
 
-def opentag(tag, block = 0, id = None, cl = None):   # write html opening tag
-    output = ""
-    if (block):
-        output += '\n'
-    output += '<' + tag
-    if (id != None):
-        output += " id='" + id + "'"
-    if (cl != None):
-        output += " class='" + cl + "'"
-    output += '>'
-    if (block):
-        output += '\n'
-    return output
-
-def closetag(tag, block = 0):  # write html closing tag
-    if (block == 0):
-        return "</" + tag + ">"
-    else:
-        return "\n</" + tag + ">\n"
-
-def tag(tag, block = 0, content = ""):  # write html opening tag, content, and html closing tag
-    o = opentag(tag, block)
-    c = closetag(tag, block)
-    return o + content + c
-
-def orderbyfreq(l):     # order a list by the frequency of its elements and remove duplicates
-    temp_l = l[:]
-    l = list(set(l))
-    l = [[i, temp_l.count(i)] for i in l]   # add count of each element
-    l.sort(key=lambda x:temp_l.count(x[0])) # sort by count
-    l = [i[0] + ' (' + str(i[1]) + ')' for i in l]  # put element and count into string
-    l = l[::-1]     # reverse
-    return l
-
 def addtag(l, tag):  # add prefix and suffix tags to each item in a list
     l2 = ['<' + tag + '>' + i + '</' + tag + '>' for i in l]
     return l2
 
-def truncl(input, limit):      # truncate list
-    if (len(input) > limit):
-        more = str(len(input) - limit)
-        output = input[:limit]
-        output.append("+ " + more + " more")
-        return(output)
-    else:
-        return(input)
+def backticks(l):
+    return ["`" + x + "`" for x in l]
 
 def plural(noun, quantity): # return "1 noun" or "n nouns"
     if (quantity == 1):
@@ -349,26 +372,3 @@ def fsubject(template): # Replace variables in the title template provided in co
     r = varpattern.sub(lambda m: varfilter[re.escape(m.group(0))], template)
     logger.debug("Returning subject line " + r)
     return r
-
-def writedata(subtitle, data = None):   # write title and data
-    if (subtitle == ""):
-        logger.warning("No subtitle provided.. skipping section")
-        return
-
-    if (data == None or len(data) == 0):
-        logger.debug("No data provided.. just printing subtitle")
-        return tag('p', 0, subtitle)
-    else:
-        logger.debug("Received data " + str(data))
-        subtitle += ':'
-        if (len(data) == 1):
-            return tag('p', 0, subtitle + ' ' + data[0])
-        else:
-            output = ""
-            output += tag('p', 0, subtitle)
-            output += opentag('ul', 1)
-            for datum in data:
-                output += tag('li', 0, datum)
-            output += closetag('ul', 1)
-            return output
-
diff --git a/logparse/parsers/httpd.py b/logparse/parsers/httpd.py
index 221f982..7175ea8 100644
--- a/logparse/parsers/httpd.py
+++ b/logparse/parsers/httpd.py
@@ -18,9 +18,8 @@ import logging
 logger = logging.getLogger(__name__)
 
 def parse_log():
-    output = ''
     logger.debug("Starting httpd section")
-    output += opentag('div', 1, 'httpd', 'section')
+    section = Section("httpd")
     accesslog = readlog(config.prefs['logs']['httpd'] + '/access.log')
     a = len(accesslog.split('\n'))
     errorlog = readlog(config.prefs['logs']['httpd'] + '/error.log')
@@ -48,33 +47,34 @@ def parse_log():
                 logger.warning("Error processing httpd access log: " + str(error))
                 traceback.print_exc()
     data_h = parsesize(data_b)
-    output += writetitle("apache")
 
     logger.info("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors")
     if (a > 0):
         logger.debug("Parsing request statistics (this might take a while)")
-        files = addtag(files, 'code')
-        files = orderbyfreq(files)
-        files = truncl(files, config.prefs['maxlist'])
-        output += writedata(plural(" request", a), files)
+        request_data = Data()
+        request_data.items = backticks(files)
+        request_data.orderbyfreq()
+        request_data.truncl(config.prefs['maxlist'])
+        request_data.subtitle = plural(" request", a)
+        section.append_data(request_data)
     if (ips != None):
         logger.debug("Parsing client statistics")
-        ips = addtag(ips, 'code')
-        ips = orderbyfreq(ips)
-        n_ip = str(len(ips))
-        ips = truncl(ips, config.prefs['maxlist'])
-        output += writedata(plural(" client", n_ip), ips)
+        client_data = Data()
+        client_data.items = orderbyfreq(ips)
+        client_data.subtitlte = plural(" client", str(len(ips)))
+        client_data.truncl(config.prefs['maxlist'])
+        section.append_data(client_data)
     if (useragents != None):
         logger.debug("Parsing user agent statistics")
-        useragents = addtag(useragents, 'code')
-        useragents = orderbyfreq(useragents)
-        n_ua = str(len(useragents))
-        useragents = truncl(useragents, config.prefs['maxlist'])
-        output += writedata(plural(" device", n_ua), useragents)
+        ua_data = Data()
+        ua_data.items = orderbyfreq(useragents)
+        n_ua = str(len(ua_data.items))
+        ua_data.truncl(config.prefs['maxlist'])
+        ua_data.subtitle = plural(" user agent", n_ua)
+        section.append_data(client_data)
 
-    output += writedata(data_h + " transferred")
-    output += writedata(plural(" error", e))
+    section.append_data(Data(data_h + " transferred"))
+    section.append_data(Data(plural(" error", e)))
 
-    output += closetag('div', 1)
     logger.info("Finished httpd section")
-    return output
+    return section
diff --git a/logparse/parsers/postfix.py b/logparse/parsers/postfix.py
index 90190f3..bee1809 100644
--- a/logparse/parsers/postfix.py
+++ b/logparse/parsers/postfix.py
@@ -14,10 +14,8 @@ import logging
 logger = logging.getLogger(__name__)
 
 def parse_log():
-    output = ''
+    section = Section("postfix")
     logger.debug("Starting postfix section")
-    output += opentag('div', 1, 'postfix', 'section')
-    output += writetitle("postfix")
     logger.debug("Searching through postfix logs")
     messages = re.findall('.*from\=<(.*)>, size\=(\d*),.*\n.*to=<(.*)>', readlog(config.prefs['logs']['postfix']))
     r = []
@@ -34,17 +32,18 @@ def parse_log():
 
     logger.debug("Analysing message recipients")
     if (len(r) > 0):
+        rec_data = Data()
         s = list(set(r))    # unique recipients
         if (len(s) > 1):
-            r = orderbyfreq(r)
-            r = truncl(r, config.prefs['maxlist'])
-            output += writedata(n + " messages sent to", r)
+            rec_data.items = r
+            rec_data.orderbyfreq()
+            rec_data.truncl(config.prefs['maxlist'])
         else:
-            output += writedata(n + " messages sent to " + r[0])
+            rec_data.subtitle = n + " messages sent to " + r[0]
+        section.append_data(rec_data)
     else:
-        output += writedata(n + " messages sent")
+        section.append_data(Data(subtitle=n + " messages sent")))
     logger.info("Found {0} messages sent to {1} recipients".format(n, str(len(r))))
-    output += writedata("total of " + size)
-    output += closetag('div', 1)
+    section.append_data(Data(subtitle="total of " + size))
     logger.info("Finished postfix section")
-    return output
+    return section 
diff --git a/logparse/parsers/smbd.py b/logparse/parsers/smbd.py
index 1bf06bd..efe48cd 100644
--- a/logparse/parsers/smbd.py
+++ b/logparse/parsers/smbd.py
@@ -16,9 +16,8 @@ import logging
 logger = logging.getLogger(__name__)
 
 def parse_log():
-    output = ''
     logger.debug("Starting smbd section")
-    output += opentag('div', 1, 'smbd', 'section')
+    section = Section("smbd")
     files = glob.glob(config.prefs['logs']['smb'] + "/log.*[!\.gz][!\.old]")    # find list of logfiles
     # for f in files:
 
@@ -64,16 +63,15 @@ def parse_log():
             # else:
             #     sigma_auths[exists[0]][1] += 1
             n_auths += 1
-    output += writetitle("samba")
-    subtitle = plural("login", n_auths) + " from"
+    auth_data = Data(subtitle=plural("login", n_auths) + " from")
     if (len(sigma_auths) == 1):             # if only one user, do not display no of logins for this user
-        subtitle += ' ' + sigma_auths[0][0]
-        output += writedata(subtitle)
+        auth_data.subtitle += ' ' + sigma_auths[0][0]
+        section.append_data(auth_data)
     else:       # multiple users
-        sigma_auths = orderbyfreq(sigma_auths)
-        sigma_auths = truncl(sigma_auths, config.prefs['maxlist'])
+        auth_data.items = sigma_auths
+        auth_data.orderbyfreq()
+        auth_data.truncl(config.prefs['maxlist'])
         logger.debug("Found {0} samba logins".format(str(n_auths)))
-        output += writedata(subtitle, sigma_auths)
-    output += closetag('div', 1)
+    section.append_data(auth_data)
     logger.info("Finished smbd section")
-    return output
+    return section
diff --git a/logparse/parsers/sshd.py b/logparse/parsers/sshd.py
index 524312e..38b3064 100644
--- a/logparse/parsers/sshd.py
+++ b/logparse/parsers/sshd.py
@@ -14,9 +14,8 @@ import logging
 logger = logging.getLogger(__name__)
 
 def parse_log():
-    output = ''
     logger.debug("Starting sshd section")
-    output += opentag('div', 1, 'sshd', 'section')
+    section = Section("ssh")
     logger.debug("Searching for matches in {0}".format(config.prefs['logs']['auth']))
     matches = re.findall('.*sshd.*Accepted publickey for .* from .*', readlog(config.prefs['logs']['auth']))    # get all logins
     logger.debug("Finished searching for logins")
@@ -38,20 +37,17 @@ def parse_log():
             users[exists[0]][1] += 1
     logger.debug("Parsed list of authorised users")
 
-    output += writetitle('sshd')
-    subtitle = plural('login', num) + ' from'
+    auth_data = Data(subtitle=plural('login', num) + ' from')
+
     if (len(users) == 1):             # if only one user, do not display no of logins for this user
         logger.debug("found " + str(len(matches)) + " ssh logins for user " + users[0][0])
-        subtitle += ' ' + users[0][0]
-        output += writedata(subtitle)
+        auth_data.subtitle += ' ' + users[0][0]
     else:
         for user in users:
-            data.append(user[0] + ' (' + str(user[1]) + ')')
-            if len(data) > config.prefs['maxlist']:     # if there are lots of users, truncate them
-                data.append('+ ' + str(len(users) - config.prefs['maxlist'] - 1) + " more")
-                break
+            auth_data.items.append(user[0] + ' (' + str(user[1]) + ')')
+            auth_data.orderbyfreq()
+            auth_data.truncl(config.prefs['maxlist'])
         logger.debug("found " + str(len(matches)) + " ssh logins for users " + str(data))
-        output += writedata(subtitle, data)
-    output += closetag('div', 1)
+    section.append_data(auth_data)
     logger.info("Finished sshd section")
-    return output
+    return section
diff --git a/logparse/parsers/sudo.py b/logparse/parsers/sudo.py
index ef74ec2..87d4be7 100644
--- a/logparse/parsers/sudo.py
+++ b/logparse/parsers/sudo.py
@@ -14,9 +14,8 @@ import logging
 logger = logging.getLogger(__name__)
 
 def parse_log():
-    output = ''
     logger.debug("Starting sudo section")
-    output += opentag('div', 1, 'sudo', 'section')
+    section = Section("sudo")
     logger.debug("Searching for matches in {0}".format(config.prefs['logs']['auth']))
     umatches = re.findall('.*sudo:session\): session opened.*', readlog(config.prefs['logs']['auth']))
     num = sum(1 for line in umatches)    # total number of sessions
@@ -35,22 +34,25 @@ def parse_log():
         commands.append(cmd)
     logger.debug("Finished parsing sudo sessions")
 
-    output += writetitle("sudo")
-    subtitle = plural("sudo session", num) + " for"
+    auth_data = Data(subtitle=plural("sudo session", num) + " for")
+
     if (len(users) == 1):
         logger.debug("found " + str(num) + " sudo session(s) for user " + str(users[0]))
-        subtitle += ' ' + users[0][0]
-        output += writedata(subtitle)
+        auth_data.subtitle += ' ' + users[0][0]
     else:
         for user in users:
-            data.append(user[0] + ' (' + str(user[1]) + ')')
+            auth_data.items.append(user[0] + ' (' + str(user[1]) + ')')
         logger.debug("found " + str(num) + " sudo sessions for users " + str(data))
-        output += writedata(subtitle, data)
+    section.append_data(auth_data)
+
     if (len(commands) > 0):
-        commands = addtag(commands, 'code')
-        commands = orderbyfreq(commands)
-        commands = truncl(commands, config.prefs['maxcmd'])
-        output += writedata("top sudo commands", [c for c in commands])
-    output += closetag('div', 1)
-    return output
+        command_data = Data(subtitle="top sudo commands")
+        commands = backticks(commands)
+        command_data.items = commands
+        command_data.orderbyfreq()
+        command_data.truncl(config.prefs['maxcmd'])
+        section.append_data(command_data)
+
     logger.info("Finished sudo section")
+
+    return section
diff --git a/logparse/parsers/temperature.py b/logparse/parsers/temperature.py
index 3441893..2680a44 100644
--- a/logparse/parsers/temperature.py
+++ b/logparse/parsers/temperature.py
@@ -72,8 +72,7 @@ class HddtempClient:
 
 def parse_log():
     logger.debug("Starting temp section")
-    output = writetitle("temperatures")
-    output += opentag('div', 1, 'temp', 'section')
+    section = Section("temperatures")
 
     # cpu temp