fix logrotate functionality
[logparse.git] / logparse / parsers / httpd.py
index 221f98224a85c5e70e82414478e9ac251507bd0a..518af7adac3daa52418048a5f5dcf0c960c44f52 100644 (file)
@@ -17,64 +17,76 @@ from .. import config
 import logging
 logger = logging.getLogger(__name__)
 
+ACCESS_REGEX = "^\s*(\S+).*\"GET (\S+) HTTP(?:\/\d\.\d)?\" (\d{3}) (\d*) \".+\" \"(.*)\""
+
+class AccessLine(object):
+
+    def __init__(self, line):
+        self.line = line
+        fields = re.search(ACCESS_REGEX, line)
+        
+        self.client = fields.group(1)
+        self.file = fields.group(2)
+        self.statuscode = int(fields.group(3))
+        self.bytes = int(fields.group(4))
+        self.useragent = fields.group(5)
+
 def parse_log():
-    output = ''
+
     logger.debug("Starting httpd section")
-    output += opentag('div', 1, 'httpd', 'section')
+    section = Section("httpd")
+
     accesslog = readlog(config.prefs['logs']['httpd'] + '/access.log')
-    a = len(accesslog.split('\n'))
+
     errorlog = readlog(config.prefs['logs']['httpd'] + '/error.log')
-    e = len(errorlog.split('\n'))
-    data_b = 0
-    ips = []
-    files = []
-    useragents = []
+    total_errors = len(errorlog.splitlines())
+
+    logger.debug("Retrieved log data")
+
     errors = []
     notfound = []
     unprivileged = []
 
     logger.debug("Searching through access log")
-    for line in accesslog.split('\n'):
-        fields = re.search('^(\S*) .*GET (\/.*) HTTP/\d\.\d\" 200 (\d*) \"(.*)\".*\((.*)\;', line)
-        try:
-            ips.append(resolve(fields.group(1), fqdn=config.prefs['httpd']['resolve-domains']))
-            files.append(fields.group(2))
-            useragents.append(fields.group(5))
-            data_b += int(fields.group(3))
-        except Exception as error:
-            if type(error) is AttributeError: # this line is not an access log
-                pass
-            else:
-                logger.warning("Error processing httpd access log: " + str(error))
-                traceback.print_exc()
-    data_h = parsesize(data_b)
-    output += writetitle("apache")
-
-    logger.info("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors")
-    if (a > 0):
-        logger.debug("Parsing request statistics (this might take a while)")
-        files = addtag(files, 'code')
-        files = orderbyfreq(files)
-        files = truncl(files, config.prefs['maxlist'])
-        output += writedata(plural(" request", a), files)
-    if (ips != None):
-        logger.debug("Parsing client statistics")
-        ips = addtag(ips, 'code')
-        ips = orderbyfreq(ips)
-        n_ip = str(len(ips))
-        ips = truncl(ips, config.prefs['maxlist'])
-        output += writedata(plural(" client", n_ip), ips)
-    if (useragents != None):
-        logger.debug("Parsing user agent statistics")
-        useragents = addtag(useragents, 'code')
-        useragents = orderbyfreq(useragents)
-        n_ua = str(len(useragents))
-        useragents = truncl(useragents, config.prefs['maxlist'])
-        output += writedata(plural(" device", n_ua), useragents)
-
-    output += writedata(data_h + " transferred")
-    output += writedata(plural(" error", e))
-
-    output += closetag('div', 1)
+
+    accesses = []
+
+    for line in accesslog.splitlines():
+        if "GET" in line:
+            accesses.append(AccessLine(line))
+
+    total_requests = len(accesses)
+    
+    section.append_data(Data("Total of " + plural("request", total_requests)))
+    section.append_data(Data(plural("error", total_errors)))
+
+    size = Data()
+    size.subtitle = "Transferred " + parsesize(sum([ac.bytes for ac in accesses]))
+    section.append_data(size)
+
+    clients = Data()
+    clients.items = [resolve(ac.client, "fqdn") for ac in accesses]
+    clients.orderbyfreq()
+    clients.subtitle = "Received requests from " + plural("client", len(clients.items))
+    clients.truncl(config.prefs['maxlist'])
+    section.append_data(clients)
+
+    files = Data()
+    files.items = [ac.file for ac in accesses]
+    files.orderbyfreq()
+    files.subtitle = plural("file", len(files.items)) + " requested"
+    files.truncl(config.prefs['maxlist'])
+    section.append_data(files)
+
+    useragents = Data()
+    useragents.items = [ac.useragent for ac in accesses]
+    useragents.orderbyfreq()
+    useragents.subtitle = plural("user agent", len(useragents.items))
+    useragents.truncl(config.prefs['maxlist'])
+    section.append_data(useragents)
+
+    logger.info("httpd has received " + str(total_requests) + " requests with " + str(total_errors) + " errors")
+
+
     logger.info("Finished httpd section")
-    return output
+    return section