1#
2# httpd.py
3#
4# Analyse Apache (httpd) server logs, including data transferred, requests,
5# clients, and errors. Note that Apache's logs can get filled up very quickly
6# with the default verbosity, leading to logparse taking a very long time to
7# analyse them. In general the default verbosity is good, but logs should be
8# cleared as soon as they are analysed (make sure 'rotate' is set to 'y').
9#
10
11import re
12
13from ..formatting import *
14from ..util import readlog, resolve
15from .. import config
16
17import logging
18logger = logging.getLogger(__name__)
19
20ACCESS_REGEX = "^\s*(\S+).*\"GET (\S+) HTTP(?:\/\d\.\d)?\" (\d{3}) (\d*) \".+\" \"(.*)\""
21
22class AccessLine(object):
23
24 def __init__(self, line):
25 self.line = line
26 fields = re.search(ACCESS_REGEX, line)
27
28 self.client = fields.group(1)
29 self.file = fields.group(2)
30 self.statuscode = int(fields.group(3))
31 self.bytes = int(fields.group(4))
32 self.useragent = fields.group(5)
33
34def parse_log():
35
36 logger.debug("Starting httpd section")
37 section = Section("httpd")
38
39 accesslog = readlog(config.prefs['logs']['httpd'] + '/access.log')
40
41 errorlog = readlog(config.prefs['logs']['httpd'] + '/error.log')
42 total_errors = len(errorlog.splitlines())
43
44 logger.debug("Retrieved log data")
45
46 errors = []
47 notfound = []
48 unprivileged = []
49
50 logger.debug("Searching through access log")
51
52 accesses = []
53
54 for line in accesslog.splitlines():
55 if "GET" in line:
56 accesses.append(AccessLine(line))
57
58 total_requests = len(accesses)
59
60 section.append_data(Data("Total of " + plural("request", total_requests)))
61 section.append_data(Data(plural("error", total_errors)))
62
63 size = Data()
64 size.subtitle = "Transferred " + parsesize(sum([ac.bytes for ac in accesses]))
65 section.append_data(size)
66
67 clients = Data()
68 clients.items = [resolve(ac.client, "fqdn") for ac in accesses]
69 clients.orderbyfreq()
70 clients.subtitle = "Received requests from " + plural("client", len(clients.items))
71 clients.truncl(config.prefs['maxlist'])
72 section.append_data(clients)
73
74 files = Data()
75 files.items = [ac.file for ac in accesses]
76 files.orderbyfreq()
77 files.subtitle = plural("file", len(files.items)) + " requested"
78 files.truncl(config.prefs['maxlist'])
79 section.append_data(files)
80
81 useragents = Data()
82 useragents.items = [ac.useragent for ac in accesses]
83 useragents.orderbyfreq()
84 useragents.subtitle = plural("user agent", len(useragents.items))
85 useragents.truncl(config.prefs['maxlist'])
86 section.append_data(useragents)
87
88 logger.info("httpd has received " + str(total_requests) + " requests with " + str(total_errors) + " errors")
89
90
91 logger.info("Finished httpd section")
92 return section