-#
-# httpd.py
-#
-# Analyse Apache (httpd) server logs, including data transferred, requests,
-# clients, and errors. Note that Apache's logs can get filled up very quickly
-# with the default verbosity, leading to logparse taking a very long time to
-# analyse them. In general the default verbosity is good, but logs should be
-# cleared as soon as they are analysed (make sure 'rotate' is set to 'y').
-#
+# -*- coding: utf-8 -*-
+"""
+Analyse Apache (httpd) server logs, including data transferred, requests,
+clients, user agents, and errors. Note that Apache's logs can get filled up
+very quickly with the default verbosity, leading to logparse taking a very
+long time to analyse them. In general the default verbosity is good, but logs
+should be cleared as soon as they are analysed (make sure 'rotate' enabled in
+the logparse config).
+"""
+
+import datetime
import re
+import time
+
+from logparse.formatting import *
+from logparse.util import readlog, resolve
+from logparse import config
+from logparse.load_parsers import Parser
+
+IPv4_ADDR_REGEX = '(?:\d{1,3}\.){3}\d{1,3}'
+IPv6_ADDR_REGEX = "([0-9A-Fa-f]{0,4}:){2,7}([0-9A-Fa-f]{0,4})"
+IP_ADDR_REGEX = "("+IPv4_ADDR_REGEX+"|"+IPv6_ADDR_REGEX+")"
+LOG_VARS = {
+ "%a": "(?P<client>{})?".format(IPv4_ADDR_REGEX), # client IP
+ "%A": "(?P<peer>{})?".format(IP_ADDR_REGEX), # local (peer) IP
+ "%B": "(?P<bytes>(\d+|-))", # bytes
+ "%b": "(?P<clfbytes>(\d+|\"-\"))", # bytes (CLF format)
+ "%{[^}]+?}C": "(?P<cookie>.*)", # contents of cookie
+ "%D": "(?P<serveus>-?\d+)", # time taken to serve request (μs)
+ "%{[^}]+?}e": "(?P<envvar>.*)", # environment variable contents
+ "%f": "(?P<file>.*)", # file name requested
+ "%h": "(?P<hostname>\S+)", # remote hostname or IP
+ "%H": "(?P<protocol>.*)", # request protocol
+ "%{Referer}i": "(?P<referer>.*)", # referrer
+ "%{User-Agent}i": "(?P<useragent>.*)", # user agent string
+ "%{[^}]+?}i": "(?P<header>.*)", # request header
+ "%k": "(?P<keepalive>\d*)", # number of keepalive requests
+ "%l": "(?P<logname>.*)", # remote logname
+ "%m": "(?P<method>.*)", # request method
+ "%{[^}]+?}n": "(?P<note>.*)", # notes
+ "%{[^}]+?}o": "(?P<replyheader>.*)", # reply header
+ "%p": "(?P<cport>\d*)", # canonical port on server
+ "%{[^}]+?}p": "(?P<port>\d*)", # optional port
+ "%P": "(?P<pid>\d*)", # process ID of child
+ "%{[^}]+?}P": "(?P<thread>.*)", # process or thread ID
+ "%q": "(?P<query>.*)", # query string
+ "%r": "(?P<requesthead>.*)", # first line of request
+ "%R": "(?P<handler>.*)", # handler generating response
+ "%s": "(?P<status>(\d+?|-))", # status code
+ "%t": "\[(?P<date>.*?)\]", # request date and time with offset
+ "%{[^}]+?}t": "(?P<fdate>\d+)", # request date and time ()custom format)
+ "%T": "(?P<serves>\d+)", # time taken to serve request (seconds)
+ "%{[^}]+?}T": "(?P<servec>\d+)", # time taken to serve request (custom format)
+ "%u": "(?P<user>.*)", # remote user if authenticated
+ "%U": "(?P<url>.*)", # URL path excluding query string
+ "%v": "(?P<servername>.*)", # server name
+ "%V": "(?P<servernamec>.*)", # server name (custom format)
+ "%X": "(?P<responsestatus>.?)", # status on response completion
+ "%I": "(?P<bytesreceived>\d+)", # bytes received
+ "%O": "(?P<bytessent>\d+)", # bytes sent
+ "%S": "(?P<bytestransferred>\d+)?" # total bytes transferred
+}
+LOG_ESCAPES = {
+ ">": "", # final value
+ "<": "", # initial value
+ "%%": "%" # percent escape
+}
+
+def convert_logformat(format_template):
+ """
+ Convert an Apache LogFormat string to a regex pattern
+ """
+ escape_pattern = re.compile('|'.join(LOG_ESCAPES.keys()))
+ format_template = escape_pattern.sub(lambda x: LOG_ESCAPES[x.group()], format_template)
+ var_pattern = re.compile('|'.join(LOG_VARS.keys()))
+ format_template = var_pattern.sub(lambda x: LOG_VARS[x.group()], format_template)
+ return re.compile(format_template)
+
+
+class AccessLine(object):
+ """
+ Retrieves information from a line of the httpd access log
+ """
+
+ def __init__(self, record, datefmt, pattern):
+ """
+ Assign attributes and verify/cast those than require it. Note that the
+ `pattern` argument must be a pre-compiled regex object (to save time).
+ """
+
+ # Parse from a raw logfile string
+ self.properties = pattern.search(record).groupdict()
+ for field, value in self.properties.items():
+ if value and not (value == "-" or value == "\"-\""):
+ setattr(self, field, value)
+ else:
+ setattr(self, field, None)
+
+ # Verify data transfer metrics
+ for field, value in [x for x in self.properties.items() if "bytes" in x[0]]:
+ if isinstance(value, str) and value.isdigit():
+ setattr(self, field, int(value))
+ else:
+ setattr(self, field, 0)
+
+ # Verify date
+ self.date = datetime.datetime.strptime(self.properties["date"], datefmt)
+
+ # Verify client
+ if (not hasattr(self, "client") or not self.client) \
+ and hasattr(self, "hostname") and self.hostname:
+ self.client = self.hostname
+
+
+ # Verify file
+ if (not hasattr(self, "file") or not self.file) and hasattr(self, "requesthead"):
+ try:
+ self.file = re.search(r"^\w+\s(.*)\s\S+$", self.requesthead).group(1)
+ except:
+ self.file = ""
+
+ def match_client(self, pattern):
+ """
+ Check if the client of this object matches against a regex string and
+ return a boolean result of this comparison.
+ """
+ if hasattr(self, "client") and self.client:
+ return re.fullmatch(pattern, self.client)
+ elif hasattr(self, "hostname") and self.hostname:
+ return re.fullmatch(pattern, self.hostname)
+ else:
+ return True
+
+ def match_file(self, pattern):
+ """
+ Check if the target of this object matches against a regex string and
+ return a boolean result of this comparison.
+ """
+ if hasattr(self, "file") and self.file:
+ return re.fullmatch(pattern, self.file)
+ else:
+ return True
+
+ def match_ref(self, pattern):
+ """
+ Check if the referrer of this object matches against a regex string and
+ return a boolean result of this comparison.
+ """
+ if hasattr(self, "referer") and self.referer:
+ return re.fullmatch(pattern, self.referer)
+ else:
+ return True
+
+
+class Httpd(Parser):
+
+ def __init__(self):
+ super().__init__()
+ self.name = "httpd"
+ self.info = "Analyse Apache (httpd) server logs, including data " \
+ "transferred, requests, clients, and errors."
+
+ def parse_log(self):
-from ..formatting import *
-from ..util import readlog, resolve
-from .. import config
-
-import logging
-logger = logging.getLogger(__name__)
-
-def parse_log():
- logger.debug("Starting httpd section")
- section = Section("httpd")
- accesslog = readlog(config.prefs['logs']['httpd'] + '/access.log')
- a = len(accesslog.split('\n'))
- errorlog = readlog(config.prefs['logs']['httpd'] + '/error.log')
- e = len(errorlog.split('\n'))
- data_b = 0
- ips = []
- files = []
- useragents = []
- errors = []
- notfound = []
- unprivileged = []
-
- logger.debug("Searching through access log")
- for line in accesslog.split('\n'):
- fields = re.search('^(\S*) .*GET (\/.*) HTTP/\d\.\d\" 200 (\d*) \"(.*)\".*\((.*)\;', line)
- try:
- ips.append(resolve(fields.group(1), fqdn=config.prefs['httpd']['resolve-domains']))
- files.append(fields.group(2))
- useragents.append(fields.group(5))
- data_b += int(fields.group(3))
- except Exception as error:
- if type(error) is AttributeError: # this line is not an access log
- pass
+ logger.debug("Starting httpd section")
+ section = Section("httpd")
+
+ datefmt = config.prefs.get("httpd", "datetime-format")
+ if not datefmt:
+ datefmt = config.prefs.get("logparse", "datetime-format")
+ if not datefmt:
+ logger.error("Invalid datetime-format configuration parameter")
+ return None
+
+ # Initialise patterns
+ logger.debug("Converting pattern from {0}".format(
+ config.prefs.get("httpd", "access-format")))
+ pattern = convert_logformat(config.prefs.get("httpd", "access-format"))
+ logger.debug("Compiled log format {0}".format(pattern))
+
+ logger.debug("Retrieving log data")
+
+ accesslog = readlog(config.prefs.get("logs", "httpd-access"))
+
+ errorlog= readlog(config.prefs.get("logs", "httpd-error"))
+ total_errors = len(errorlog.splitlines())
+
+ logger.debug("Parsing access logs")
+
+ accesses = []
+
+ for line in accesslog.splitlines():
+ if not "GET" in line:
+ continue
+ try:
+ ac_obj = AccessLine(line, datefmt, pattern)
+ except Exception as e:
+ logger.warning("Malformed access log: {0}. "
+ "{1}: {2}".format(line, type(e).__name__, e))
else:
- logger.warning("Error processing httpd access log: " + str(error))
- traceback.print_exc()
- data_h = parsesize(data_b)
-
- logger.info("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors")
- if (a > 0):
- logger.debug("Parsing request statistics (this might take a while)")
- request_data = Data()
- request_data.items = backticks(files)
- request_data.orderbyfreq()
- request_data.truncl(config.prefs['maxlist'])
- request_data.subtitle = plural(" request", a)
- section.append_data(request_data)
- if (ips != None):
- logger.debug("Parsing client statistics")
- client_data = Data()
- client_data.items = orderbyfreq(ips)
- client_data.subtitlte = plural(" client", str(len(ips)))
- client_data.truncl(config.prefs['maxlist'])
- section.append_data(client_data)
- if (useragents != None):
- logger.debug("Parsing user agent statistics")
- ua_data = Data()
- ua_data.items = orderbyfreq(useragents)
- n_ua = str(len(ua_data.items))
- ua_data.truncl(config.prefs['maxlist'])
- ua_data.subtitle = plural(" user agent", n_ua)
- section.append_data(client_data)
-
- section.append_data(Data(data_h + " transferred"))
- section.append_data(Data(plural(" error", e)))
-
- logger.info("Finished httpd section")
- return section
+ if not section.period.compare(ac_obj.date):
+ continue
+
+ checks = [
+ ac_obj.match_client(
+ config.prefs.get("httpd", "clients")),
+ ac_obj.match_file(
+ config.prefs.get("httpd", "files")),
+ ac_obj.match_ref(
+ config.prefs.get("httpd", "referrers"))
+ ]
+ if not all(checks):
+ logger.debug("Ignoring access log due to config: " + line)
+ continue
+ accesses.append(ac_obj)
+
+ logger.debug("Processed {0} access logs".format(len(accesses)))
+
+ total_requests = len(accesses)
+
+ section.append_data(Data("Total of "
+ + plural("request", total_requests)))
+ section.append_data(Data(plural("error", total_errors)))
+
+ logger.debug("Parsing total size")
+
+ size = Data()
+ size.subtitle = "Transferred " \
+ + parsesize(sum([ac.bytessent for ac in accesses]))
+ section.append_data(size)
+
+ logger.debug("Parsing clients")
+
+# clients = Data()
+# clients.items = [resolve(ac.hostname,
+# config.prefs.get("httpd", "httpd-resolve-domains"))
+# for ac in accesses]
+# clients.orderbyfreq()
+# clients.subtitle = "Received requests from " \
+# + plural("client", len(clients.items))
+# clients.truncl(config.prefs.getint("logparse", "maxlist"))
+# section.append_data(clients)
+
+ logger.debug("Parsing files")
+
+ files = Data()
+ files.items = [ac.file for ac in accesses if hasattr(ac, "file")]
+ files.orderbyfreq()
+ files.subtitle = plural("file", len(files.items)) + " requested"
+ files.truncl(config.prefs.getint("logparse", "maxlist"))
+ section.append_data(files)
+
+ logger.debug("Parsing user agents")
+
+ useragents = Data()
+ useragents.items = [ac.useragent for ac in accesses]
+ useragents.orderbyfreq()
+ useragents.subtitle = plural("user agent", len(useragents.items))
+ useragents.truncl(config.prefs.getint("logparse", "maxlist"))
+ section.append_data(useragents)
+
+ logger.info("httpd has received " + str(total_requests)
+ + " requests with " + str(total_errors) + " errors")
+
+ logger.info("Finished httpd section")
+ return section