add journald communication capability
[logparse.git] / logparse / parsers / httpd.py
index d0e800c896f75a451768ff809657d2e454939842..518af7adac3daa52418048a5f5dcf0c960c44f52 100644 (file)
@@ -17,66 +17,76 @@ from .. import config
 import logging
 logger = logging.getLogger(__name__)
 
+ACCESS_REGEX = "^\s*(\S+).*\"GET (\S+) HTTP(?:\/\d\.\d)?\" (\d{3}) (\d*) \".+\" \"(.*)\""
+
+class AccessLine(object):
+
+    def __init__(self, line):
+        self.line = line
+        fields = re.search(ACCESS_REGEX, line)
+        
+        self.client = fields.group(1)
+        self.file = fields.group(2)
+        self.statuscode = int(fields.group(3))
+        self.bytes = int(fields.group(4))
+        self.useragent = fields.group(5)
+
 def parse_log():
+
     logger.debug("Starting httpd section")
     section = Section("httpd")
+
     accesslog = readlog(config.prefs['logs']['httpd'] + '/access.log')
-    a = len(accesslog.split('\n'))
+
     errorlog = readlog(config.prefs['logs']['httpd'] + '/error.log')
-    e = len(errorlog.split('\n'))
-    data_b = 0
-    ips = []
-    files = []
-    useragents = []
+    total_errors = len(errorlog.splitlines())
+
+    logger.debug("Retrieved log data")
+
     errors = []
     notfound = []
     unprivileged = []
 
     logger.debug("Searching through access log")
-    for line in accesslog.split('\n'):
-        fields = re.search('^(\S*) .*GET (\/.*) HTTP/\d\.\d\" 200 (\d*) \"(.*)\".*\((.*)\;', line)
-        try:
-            ips.append(resolve(fields.group(1), fqdn=config.prefs['httpd']['resolve-domains']))
-            files.append(fields.group(2))
-            useragents.append(fields.group(5))
-            data_b += int(fields.group(3))
-        except Exception as error:
-            if type(error) is AttributeError: # this line is not an access log
-                pass
-            else:
-                logger.warning("Error processing httpd access log: " + str(error))
-                traceback.print_exc()
-    data_h = parsesize(data_b)
-
-    logger.info("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors")
-    if (a > 0):
-        logger.debug("Parsing request statistics (this might take a while)")
-        request_data = Data()
-        request_data.items = backticks(files)
-        request_data.orderbyfreq()
-        request_data.truncl(config.prefs['maxlist'])
-        request_data.subtitle = plural(" request", a)
-        section.append_data(request_data)
-    if (ips != None):
-        logger.debug("Parsing client statistics")
-        client_data = Data()
-        client_data.items = ips
-        client_data.orderbyfreq()
-        client_data.subtitle = plural(" client", str(len(ips)))
-        client_data.truncl(config.prefs['maxlist'])
-        section.append_data(client_data)
-    if (useragents != None):
-        logger.debug("Parsing user agent statistics")
-        ua_data = Data()
-        ua_data.items = useragents
-        ua_data.orderbyfreq()
-        n_ua = str(len(ua_data.items))
-        ua_data.truncl(config.prefs['maxlist'])
-        ua_data.subtitle = plural(" user agent", n_ua)
-        section.append_data(ua_data)
-
-    section.append_data(Data(data_h + " transferred"))
-    section.append_data(Data(plural(" error", e)))
+
+    accesses = []
+
+    for line in accesslog.splitlines():
+        if "GET" in line:
+            accesses.append(AccessLine(line))
+
+    total_requests = len(accesses)
+    
+    section.append_data(Data("Total of " + plural("request", total_requests)))
+    section.append_data(Data(plural("error", total_errors)))
+
+    size = Data()
+    size.subtitle = "Transferred " + parsesize(sum([ac.bytes for ac in accesses]))
+    section.append_data(size)
+
+    clients = Data()
+    clients.items = [resolve(ac.client, "fqdn") for ac in accesses]
+    clients.orderbyfreq()
+    clients.subtitle = "Received requests from " + plural("client", len(clients.items))
+    clients.truncl(config.prefs['maxlist'])
+    section.append_data(clients)
+
+    files = Data()
+    files.items = [ac.file for ac in accesses]
+    files.orderbyfreq()
+    files.subtitle = plural("file", len(files.items)) + " requested"
+    files.truncl(config.prefs['maxlist'])
+    section.append_data(files)
+
+    useragents = Data()
+    useragents.items = [ac.useragent for ac in accesses]
+    useragents.orderbyfreq()
+    useragents.subtitle = plural("user agent", len(useragents.items))
+    useragents.truncl(config.prefs['maxlist'])
+    section.append_data(useragents)
+
+    logger.info("httpd has received " + str(total_requests) + " requests with " + str(total_errors) + " errors")
+
 
     logger.info("Finished httpd section")
     return section