518af7adac3daa52418048a5f5dcf0c960c44f52
   1#
   2#   httpd.py
   3#   
   4#   Analyse Apache (httpd) server logs, including data transferred, requests,
   5#   clients, and errors. Note that Apache's logs can get filled up very quickly
   6#   with the default verbosity, leading to logparse taking a very long time to
   7#   analyse them. In general the default verbosity is good, but logs should be
   8#   cleared as soon as they are analysed (make sure 'rotate' is set to 'y'). 
   9#
  10
  11import re
  12
  13from ..formatting import *
  14from ..util import readlog, resolve
  15from .. import config
  16
  17import logging
  18logger = logging.getLogger(__name__)
  19
  20ACCESS_REGEX = "^\s*(\S+).*\"GET (\S+) HTTP(?:\/\d\.\d)?\" (\d{3}) (\d*) \".+\" \"(.*)\""
  21
  22class AccessLine(object):
  23
  24    def __init__(self, line):
  25        self.line = line
  26        fields = re.search(ACCESS_REGEX, line)
  27        
  28        self.client = fields.group(1)
  29        self.file = fields.group(2)
  30        self.statuscode = int(fields.group(3))
  31        self.bytes = int(fields.group(4))
  32        self.useragent = fields.group(5)
  33
  34def parse_log():
  35
  36    logger.debug("Starting httpd section")
  37    section = Section("httpd")
  38
  39    accesslog = readlog(config.prefs['logs']['httpd'] + '/access.log')
  40
  41    errorlog = readlog(config.prefs['logs']['httpd'] + '/error.log')
  42    total_errors = len(errorlog.splitlines())
  43
  44    logger.debug("Retrieved log data")
  45
  46    errors = []
  47    notfound = []
  48    unprivileged = []
  49
  50    logger.debug("Searching through access log")
  51
  52    accesses = []
  53
  54    for line in accesslog.splitlines():
  55        if "GET" in line:
  56            accesses.append(AccessLine(line))
  57
  58    total_requests = len(accesses)
  59    
  60    section.append_data(Data("Total of " + plural("request", total_requests)))
  61    section.append_data(Data(plural("error", total_errors)))
  62
  63    size = Data()
  64    size.subtitle = "Transferred " + parsesize(sum([ac.bytes for ac in accesses]))
  65    section.append_data(size)
  66
  67    clients = Data()
  68    clients.items = [resolve(ac.client, "fqdn") for ac in accesses]
  69    clients.orderbyfreq()
  70    clients.subtitle = "Received requests from " + plural("client", len(clients.items))
  71    clients.truncl(config.prefs['maxlist'])
  72    section.append_data(clients)
  73
  74    files = Data()
  75    files.items = [ac.file for ac in accesses]
  76    files.orderbyfreq()
  77    files.subtitle = plural("file", len(files.items)) + " requested"
  78    files.truncl(config.prefs['maxlist'])
  79    section.append_data(files)
  80
  81    useragents = Data()
  82    useragents.items = [ac.useragent for ac in accesses]
  83    useragents.orderbyfreq()
  84    useragents.subtitle = plural("user agent", len(useragents.items))
  85    useragents.truncl(config.prefs['maxlist'])
  86    section.append_data(useragents)
  87
  88    logger.info("httpd has received " + str(total_requests) + " requests with " + str(total_errors) + " errors")
  89
  90
  91    logger.info("Finished httpd section")
  92    return section