1# -*- coding: utf-8 -*- 2 3""" 4Analyse Apache (httpd) server logs, including data transferred, requests, 5clients, user agents, and errors. Note that Apache's logs can get filled up 6very quickly with the default verbosity, leading to logparse taking a very 7long time to analyse them. In general the default verbosity is good, but logs 8should be cleared as soon as they are analysed (make sure 'rotate' enabled in 9the logparse config). 10""" 11 12import datetime 13import re 14import time 15 16from logparse.formatting import* 17from logparse.util import readlog, resolve 18from logparse import config 19from logparse.load_parsers import Parser 20 21IPv4_ADDR_REGEX ='(?:\d{1,3}\.){3}\d{1,3}' 22IPv6_ADDR_REGEX ="([0-9A-Fa-f]{0,4}:){2,7}([0-9A-Fa-f]{0,4})" 23IP_ADDR_REGEX ="("+IPv4_ADDR_REGEX+"|"+IPv6_ADDR_REGEX+")" 24LOG_VARS = { 25"%a":"(?P<client>{})?".format(IPv4_ADDR_REGEX),# client IP 26"%A":"(?P<peer>{})?".format(IP_ADDR_REGEX),# local (peer) IP 27"%B":"(?P<bytes>(\d+|-))",# bytes 28"%b":"(?P<clfbytes>(\d+|\"-\"))",# bytes (CLF format) 29"%{[^}]+?}C":"(?P<cookie>.*)",# contents of cookie 30"%D":"(?P<serveus>-?\d+)",# time taken to serve request (μs) 31"%{[^}]+?}e":"(?P<envvar>.*)",# environment variable contents 32"%f":"(?P<file>.*)",# file name requested 33"%h":"(?P<hostname>\S+)",# remote hostname or IP 34"%H":"(?P<protocol>.*)",# request protocol 35"%{Referer}i":"(?P<referer>.*)",# referrer 36"%{User-Agent}i":"(?P<useragent>.*)",# user agent string 37"%{[^}]+?}i":"(?P<header>.*)",# request header 38"%k":"(?P<keepalive>\d*)",# number of keepalive requests 39"%l":"(?P<logname>.*)",# remote logname 40"%m":"(?P<method>.*)",# request method 41"%{[^}]+?}n":"(?P<note>.*)",# notes 42"%{[^}]+?}o":"(?P<replyheader>.*)",# reply header 43"%p":"(?P<cport>\d*)",# canonical port on server 44"%{[^}]+?}p":"(?P<port>\d*)",# optional port 45"%P":"(?P<pid>\d*)",# process ID of child 46"%{[^}]+?}P":"(?P<thread>.*)",# process or thread ID 47"%q":"(?P<query>.*)",# query string 48"%r":"(?P<requesthead>.*)",# first line of request 49"%R":"(?P<handler>.*)",# handler generating response 50"%s":"(?P<status>(\d+?|-))",# status code 51"%t":"\[(?P<date>.*?)\]",# request date and time with offset 52"%{[^}]+?}t":"(?P<fdate>\d+)",# request date and time ()custom format) 53"%T":"(?P<serves>\d+)",# time taken to serve request (seconds) 54"%{[^}]+?}T":"(?P<servec>\d+)",# time taken to serve request (custom format) 55"%u":"(?P<user>.*)",# remote user if authenticated 56"%U":"(?P<url>.*)",# URL path excluding query string 57"%v":"(?P<servername>.*)",# server name 58"%V":"(?P<servernamec>.*)",# server name (custom format) 59"%X":"(?P<responsestatus>.?)",# status on response completion 60"%I":"(?P<bytesreceived>\d+)",# bytes received 61"%O":"(?P<bytessent>\d+)",# bytes sent 62"%S":"(?P<bytestransferred>\d+)?"# total bytes transferred 63} 64LOG_ESCAPES = { 65">":"",# final value 66"<":"",# initial value 67"%%":"%"# percent escape 68} 69 70defconvert_logformat(format_template): 71""" 72 Convert an Apache LogFormat string to a regex pattern 73 """ 74 escape_pattern = re.compile('|'.join(LOG_ESCAPES.keys())) 75 format_template = escape_pattern.sub(lambda x: LOG_ESCAPES[x.group()], format_template) 76 var_pattern = re.compile('|'.join(LOG_VARS.keys())) 77 format_template = var_pattern.sub(lambda x: LOG_VARS[x.group()], format_template) 78return re.compile(format_template) 79 80 81classAccessLine(object): 82""" 83 Retrieves information from a line of the httpd access log 84 """ 85 86def__init__(self, record, datefmt, pattern): 87""" 88 Assign attributes and verify/cast those than require it. Note that the 89 `pattern` argument must be a pre-compiled regex object (to save time). 90 """ 91 92# Parse from a raw logfile string 93 self.properties = pattern.search(record).groupdict() 94for field, value in self.properties.items(): 95if value and not(value =="-"or value =="\"-\""): 96setattr(self, field, value) 97else: 98setattr(self, field,None) 99 100# Verify data transfer metrics 101for field, value in[x for x in self.properties.items()if"bytes"in x[0]]: 102ifisinstance(value,str)and value.isdigit(): 103setattr(self, field,int(value)) 104else: 105setattr(self, field,0) 106 107# Verify date 108 self.date = datetime.datetime.strptime(self.properties["date"], datefmt) 109 110# Verify client 111if(nothasattr(self,"client")or not self.client) \ 112andhasattr(self,"hostname")and self.hostname: 113 self.client = self.hostname 114 115 116# Verify file 117if(nothasattr(self,"file")or not self.file)andhasattr(self,"requesthead"): 118try: 119 self.file= re.search(r"^\w+\s(.*)\s\S+$", self.requesthead).group(1) 120except: 121 self.file="" 122 123defmatch_client(self, pattern): 124""" 125 Check if the client of this object matches against a regex string and 126 return a boolean result of this comparison. 127 """ 128ifhasattr(self,"client")and self.client: 129return re.fullmatch(pattern, self.client) 130elifhasattr(self,"hostname")and self.hostname: 131return re.fullmatch(pattern, self.hostname) 132else: 133return True 134 135defmatch_file(self, pattern): 136""" 137 Check if the target of this object matches against a regex string and 138 return a boolean result of this comparison. 139 """ 140ifhasattr(self,"file")and self.file: 141return re.fullmatch(pattern, self.file) 142else: 143return True 144 145defmatch_ref(self, pattern): 146""" 147 Check if the referrer of this object matches against a regex string and 148 return a boolean result of this comparison. 149 """ 150ifhasattr(self,"referer")and self.referer: 151return re.fullmatch(pattern, self.referer) 152else: 153return True 154 155 156classHttpd(Parser): 157 158def__init__(self): 159super().__init__() 160 self.name ="httpd" 161 self.info ="Analyse Apache (httpd) server logs, including data " \ 162"transferred, requests, clients, and errors." 163 164defparse_log(self): 165 166 logger.debug("Starting httpd section") 167 section =Section("httpd") 168 169 datefmt = config.prefs.get("httpd","datetime-format") 170if not datefmt: 171 datefmt = config.prefs.get("logparse","datetime-format") 172if not datefmt: 173 logger.error("Invalid datetime-format configuration parameter") 174return None 175 176# Initialise patterns 177 logger.debug("Converting pattern from{0}".format( 178 config.prefs.get("httpd","access-format"))) 179 pattern =convert_logformat(config.prefs.get("httpd","access-format")) 180 logger.debug("Compiled log format{0}".format(pattern)) 181 182 logger.debug("Retrieving log data") 183 184 accesslog =readlog(config.prefs.get("logs","httpd-access")) 185 186 errorlog=readlog(config.prefs.get("logs","httpd-error")) 187 total_errors =len(errorlog.splitlines()) 188 189 logger.debug("Parsing access logs") 190 191 accesses = [] 192 193for line in accesslog.splitlines(): 194if not"GET"in line: 195continue 196try: 197 ac_obj =AccessLine(line, datefmt, pattern) 198exceptExceptionas e: 199 logger.warning("Malformed access log:{0}. " 200"{1}:{2}".format(line,type(e).__name__, e)) 201else: 202if not section.period.compare(ac_obj.date): 203continue 204 205 checks = [ 206 ac_obj.match_client( 207 config.prefs.get("httpd","clients")), 208 ac_obj.match_file( 209 config.prefs.get("httpd","files")), 210 ac_obj.match_ref( 211 config.prefs.get("httpd","referrers")) 212] 213if notall(checks): 214 logger.debug("Ignoring access log due to config: "+ line) 215continue 216 accesses.append(ac_obj) 217 218 logger.debug("Processed{0}access logs".format(len(accesses))) 219 220 total_requests =len(accesses) 221 222 section.append_data(Data("Total of " 223+plural("request", total_requests))) 224 section.append_data(Data(plural("error", total_errors))) 225 226 logger.debug("Parsing total size") 227 228 size =Data() 229 size.subtitle ="Transferred " \ 230+parsesize(sum([ac.bytessent for ac in accesses])) 231 section.append_data(size) 232 233 logger.debug("Parsing clients") 234 235# clients = Data() 236# clients.items = [resolve(ac.hostname, 237# config.prefs.get("httpd", "httpd-resolve-domains")) 238# for ac in accesses] 239# clients.orderbyfreq() 240# clients.subtitle = "Received requests from " \ 241# + plural("client", len(clients.items)) 242# clients.truncl(config.prefs.getint("logparse", "maxlist")) 243# section.append_data(clients) 244 245 logger.debug("Parsing files") 246 247 files =Data() 248 files.items = [ac.filefor ac in accesses ifhasattr(ac,"file")] 249 files.orderbyfreq() 250 files.subtitle =plural("file",len(files.items)) +" requested" 251 files.truncl(config.prefs.getint("logparse","maxlist")) 252 section.append_data(files) 253 254 logger.debug("Parsing user agents") 255 256 useragents =Data() 257 useragents.items = [ac.useragent for ac in accesses] 258 useragents.orderbyfreq() 259 useragents.subtitle =plural("user agent",len(useragents.items)) 260 useragents.truncl(config.prefs.getint("logparse","maxlist")) 261 section.append_data(useragents) 262 263 logger.info("httpd has received "+str(total_requests) 264+" requests with "+str(total_errors) +" errors") 265 266 logger.info("Finished httpd section") 267return section