add docs
[logparse.git] / logparse / parsers / httpd.py
index d0e800c896f75a451768ff809657d2e454939842..b86f1c1bd5b4830788ee5b627e00a196af6d835d 100644 (file)
 
 import re
 
-from ..formatting import *
-from ..util import readlog, resolve
-from .. import config
-
-import logging
-logger = logging.getLogger(__name__)
-
-def parse_log():
-    logger.debug("Starting httpd section")
-    section = Section("httpd")
-    accesslog = readlog(config.prefs['logs']['httpd'] + '/access.log')
-    a = len(accesslog.split('\n'))
-    errorlog = readlog(config.prefs['logs']['httpd'] + '/error.log')
-    e = len(errorlog.split('\n'))
-    data_b = 0
-    ips = []
-    files = []
-    useragents = []
-    errors = []
-    notfound = []
-    unprivileged = []
-
-    logger.debug("Searching through access log")
-    for line in accesslog.split('\n'):
-        fields = re.search('^(\S*) .*GET (\/.*) HTTP/\d\.\d\" 200 (\d*) \"(.*)\".*\((.*)\;', line)
-        try:
-            ips.append(resolve(fields.group(1), fqdn=config.prefs['httpd']['resolve-domains']))
-            files.append(fields.group(2))
-            useragents.append(fields.group(5))
-            data_b += int(fields.group(3))
-        except Exception as error:
-            if type(error) is AttributeError: # this line is not an access log
-                pass
-            else:
-                logger.warning("Error processing httpd access log: " + str(error))
-                traceback.print_exc()
-    data_h = parsesize(data_b)
-
-    logger.info("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors")
-    if (a > 0):
-        logger.debug("Parsing request statistics (this might take a while)")
-        request_data = Data()
-        request_data.items = backticks(files)
-        request_data.orderbyfreq()
-        request_data.truncl(config.prefs['maxlist'])
-        request_data.subtitle = plural(" request", a)
-        section.append_data(request_data)
-    if (ips != None):
-        logger.debug("Parsing client statistics")
-        client_data = Data()
-        client_data.items = ips
-        client_data.orderbyfreq()
-        client_data.subtitle = plural(" client", str(len(ips)))
-        client_data.truncl(config.prefs['maxlist'])
-        section.append_data(client_data)
-    if (useragents != None):
-        logger.debug("Parsing user agent statistics")
-        ua_data = Data()
-        ua_data.items = useragents
-        ua_data.orderbyfreq()
-        n_ua = str(len(ua_data.items))
-        ua_data.truncl(config.prefs['maxlist'])
-        ua_data.subtitle = plural(" user agent", n_ua)
-        section.append_data(ua_data)
-
-    section.append_data(Data(data_h + " transferred"))
-    section.append_data(Data(plural(" error", e)))
-
-    logger.info("Finished httpd section")
-    return section
+from logparse.formatting import *
+from logparse.util import readlog, resolve
+from logparse import config
+from logparse.load_parsers import Parser
+
+ACCESS_REGEX = "^\s*(\S+).*\"GET (\S+) HTTP(?:\/\d\.\d)?\" (\d{3}) (\d*) \".+\" \"(.*)\""
+
+class AccessLine(object):
+
+    def __init__(self, line):
+        self.line = line
+        fields = re.search(ACCESS_REGEX, line)
+        
+        self.client = fields.group(1)
+        self.file = fields.group(2)
+        self.statuscode = int(fields.group(3))
+        self.bytes = int(fields.group(4))
+        self.useragent = fields.group(5)
+
+class Httpd(Parser):
+
+    def __init__(self):
+        super().__init__()
+        self.name = "httpd"
+        self.info = "Analyse Apache (httpd) server logs, including data transferred, requests, clients, and errors."
+
+    def parse_log(self):
+
+        logger.debug("Starting httpd section")
+        section = Section("httpd")
+
+        accesslog = readlog(config.prefs.get("logs", "httpd-access"))
+
+        errorlog= readlog(config.prefs.get("logs", "httpd-error"))
+        total_errors = len(errorlog.splitlines())
+
+        logger.debug("Retrieved log data")
+
+        logger.debug("Searching through access log")
+
+        accesses = []
+
+        for line in accesslog.splitlines():
+            if "GET" in line:
+                accesses.append(AccessLine(line))
+
+        total_requests = len(accesses)
+        
+        section.append_data(Data("Total of " + plural("request", total_requests)))
+        section.append_data(Data(plural("error", total_errors)))
+
+        size = Data()
+        size.subtitle = "Transferred " + parsesize(sum([ac.bytes for ac in accesses]))
+        section.append_data(size)
+
+        clients = Data()
+        clients.items = [resolve(ac.client, config.prefs.get("httpd", "httpd-resolve-domains")) for ac in accesses]
+        clients.orderbyfreq()
+        clients.subtitle = "Received requests from " + plural("client", len(clients.items))
+        clients.truncl(config.prefs.getint("logparse", "maxlist"))
+        section.append_data(clients)
+
+        files = Data()
+        files.items = [ac.file for ac in accesses]
+        files.orderbyfreq()
+        files.subtitle = plural("file", len(files.items)) + " requested"
+        files.truncl(config.prefs.getint("logparse", "maxlist"))
+        section.append_data(files)
+
+        useragents = Data()
+        useragents.items = [ac.useragent for ac in accesses]
+        useragents.orderbyfreq()
+        useragents.subtitle = plural("user agent", len(useragents.items))
+        useragents.truncl(config.prefs.getint("logparse", "maxlist"))
+        section.append_data(useragents)
+
+        logger.info("httpd has received " + str(total_requests) + " requests with " + str(total_errors) + " errors")
+
+
+        logger.info("Finished httpd section")
+        return section