logparse / parsers / httpd.pyon commit rework parser loading interface (c9a3c26)
   1#
   2#   httpd.py
   3#   
   4#   Analyse Apache (httpd) server logs, including data transferred, requests,
   5#   clients, and errors. Note that Apache's logs can get filled up very quickly
   6#   with the default verbosity, leading to logparse taking a very long time to
   7#   analyse them. In general the default verbosity is good, but logs should be
   8#   cleared as soon as they are analysed (make sure 'rotate' is set to 'y'). 
   9#
  10
  11import re
  12
  13from logparse.formatting import *
  14from logparse.util import readlog, resolve
  15from logparse import config
  16from logparse.load_parsers import Parser
  17
  18ACCESS_REGEX = "^\s*(\S+).*\"GET (\S+) HTTP(?:\/\d\.\d)?\" (\d{3}) (\d*) \".+\" \"(.*)\""
  19
  20class AccessLine(object):
  21
  22    def __init__(self, line):
  23        self.line = line
  24        fields = re.search(ACCESS_REGEX, line)
  25        
  26        self.client = fields.group(1)
  27        self.file = fields.group(2)
  28        self.statuscode = int(fields.group(3))
  29        self.bytes = int(fields.group(4))
  30        self.useragent = fields.group(5)
  31
  32class Httpd(Parser):
  33
  34    def __init__(self):
  35        super().__init__()
  36        self.name = "httpd"
  37        self.info = "Analyse Apache (httpd) server logs, including data transferred, requests, clients, and errors."
  38
  39    def parse_log(self):
  40
  41        logger.debug("Starting httpd section")
  42        section = Section("httpd")
  43
  44        accesslog = readlog(config.prefs.get("logs", "httpd-access"))
  45
  46        errorlog= readlog(config.prefs.get("logs", "httpd-error"))
  47        total_errors = len(errorlog.splitlines())
  48
  49        logger.debug("Retrieved log data")
  50
  51        logger.debug("Searching through access log")
  52
  53        accesses = []
  54
  55        for line in accesslog.splitlines():
  56            if "GET" in line:
  57                accesses.append(AccessLine(line))
  58
  59        total_requests = len(accesses)
  60        
  61        section.append_data(Data("Total of " + plural("request", total_requests)))
  62        section.append_data(Data(plural("error", total_errors)))
  63
  64        size = Data()
  65        size.subtitle = "Transferred " + parsesize(sum([ac.bytes for ac in accesses]))
  66        section.append_data(size)
  67
  68        clients = Data()
  69        clients.items = [resolve(ac.client, config.prefs.get("httpd", "httpd-resolve-domains")) for ac in accesses]
  70        clients.orderbyfreq()
  71        clients.subtitle = "Received requests from " + plural("client", len(clients.items))
  72        clients.truncl(config.prefs.getint("logparse", "maxlist"))
  73        section.append_data(clients)
  74
  75        files = Data()
  76        files.items = [ac.file for ac in accesses]
  77        files.orderbyfreq()
  78        files.subtitle = plural("file", len(files.items)) + " requested"
  79        files.truncl(config.prefs.getint("logparse", "maxlist"))
  80        section.append_data(files)
  81
  82        useragents = Data()
  83        useragents.items = [ac.useragent for ac in accesses]
  84        useragents.orderbyfreq()
  85        useragents.subtitle = plural("user agent", len(useragents.items))
  86        useragents.truncl(config.prefs.getint("logparse", "maxlist"))
  87        section.append_data(useragents)
  88
  89        logger.info("httpd has received " + str(total_requests) + " requests with " + str(total_errors) + " errors")
  90
  91
  92        logger.info("Finished httpd section")
  93        return section