From: Andrew Lorimer Date: Sat, 8 Jun 2019 12:11:11 +0000 (+1000) Subject: major rewrite: segregate into modules and upgrade to Python 3 X-Git-Url: https://git.lorimer.id.au/logparse.git/diff_plain/4944c2236eb366988452d5e54164d3023c87abca?ds=sidebyside major rewrite: segregate into modules and upgrade to Python 3 --- diff --git a/README.md b/README.md index f4416b8..e5a293b 100644 --- a/README.md +++ b/README.md @@ -22,3 +22,9 @@ logparse is also integrated with systemd logrotate to optionally rotate logs onl Configuration is through the file `/etc/logparse.conf`, in yaml format. Note that logparse must be run as root (preferably as a cron job). This is required for temperature reading and for writing to `/var/log/`. + +### Planned features: + +- output to standalone HTML or plaintext file +- email attached PDF +- specify user to send mail from diff --git a/header.html b/header.html index 9480535..53e4ea2 100755 --- a/header.html +++ b/header.html @@ -1,8 +1,8 @@ - + $title$ $version$ on $hostname$ ($date$) - + diff --git a/logparse.py b/logparse.py deleted file mode 100755 index 19dba19..0000000 --- a/logparse.py +++ /dev/null @@ -1,863 +0,0 @@ -#! /usr/bin/python - -import argparse, logging, os, shutil, re, subprocess, sys, requests, glob, socket, sensors, datetime, time, operator, premailer, locale -from sys import stdin -from collections import namedtuple, defaultdict -from shutil import copyfile -import yaml -import ast -import logging.handlers -import types -import traceback # debugging only - -reload(sys) -sys.setdefaultencoding('utf-8') # force utf-8 because anything else should die - -locale.setlocale(locale.LC_ALL, '') # inherit system locale - -scriptdir = os.path.dirname(os.path.realpath(__file__)) - - -diskstat = namedtuple('diskstat', ['cap', 'alloc', 'free', 'ratio']) -drivetemp = namedtuple('drivetemp', ['path', 'name', 'temp', 'units']) -config = { - 'output': '~/var/www/logparse/summary.html', - 'header': scriptdir + '/header.html', - 'css': scriptdir + '/main.css', - 'title': 'logparse', - 'maxlist': 10, - 'maxcmd': 3, - 'resolve-domains': 'fqdn', - 'mail': { - 'to': '', - 'from': '', - 'subject': 'logparse from $hostname$' - }, - 'rotate': 'y', - 'hddtemp': { - 'drives': ['/dev/sda'], - 'port': 7634, - 'show-model': False, - }, - 'apache': { - 'resolve-domains': '', - }, - 'sshd': { - 'resolve-domains': '', - }, - 'smbd': { - 'resolve-domains': '', - }, - 'httpd': { - 'resolve-domains': '', - }, - 'du': { - 'paths': ['/', '/etc', '/home'], - 'force-write': 'n', - }, - 'hostname-path': '/etc/hostname', - 'logs': { - 'auth': '/var/log/auth.log', - 'cron': '/var/log/cron.log', - 'sys': '/var/log/syslog', - 'smb': '/var/log/samba', - 'zfs': '/var/log/zpool.log', - 'alloc': '/var/log/du.log', - 'postfix': '/var/log/mail.log', - 'httpd': '/var/log/apache2' - } -} - - -HTTPDSTATUS = "http://localhost/server-status" -MAILPATH = "/mnt/andrew/temp/logparse/mail.html" -MAILOUT = "" -HTMLOUT = "" -TXTOUT = "" -VERSION = "v0.1" -#DEG = u'\N{DEGREE SIGN}'.encode('utf-8') -DEG = "°".encode('unicode_escape') -CEL = "C" - -# Set up logging -logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger('logparse') -loghandler = logging.handlers.SysLogHandler(address = '/dev/log') -loghandler.setFormatter(logging.Formatter(fmt='logparse.py[' + str(os.getpid()) + ']: %(message)s')) -logger.addHandler(loghandler) - - -# Get arguments -parser = argparse.ArgumentParser(description='grab logs of some common services and send them by email') -parser.add_argument('-f', '--function', help='run a specified function with parameters (for debugging purposes',required=False) -parser.add_argument('-t','--to', help='mail recipient (\"to\" address)',required=False) - -def __main__(): - logger.info("Beginning log analysis at " + str(datenow) + ' ' + str(timenow)) - - loadconf(scriptdir + "/logparse.yaml") - - # check if user wants to test an isolated function - debugfunc = parser.parse_args().function - if debugfunc is not None: - logger.debug("executing a single function: " + debugfunc) - eval(debugfunc) - sys.exit() - - if not config['mail']['to']: - logger.info("no recipient address provided, outputting to stdout") - else: - logger.info("email will be sent to " + config['mail']['to']) - - global LOCALDOMAIN - LOCALDOMAIN = getlocaldomain() - - global pathfilter - global pathpattern - pathfilter = {"auth": config['logs']['auth'], "cron": config['logs']['cron'], "sys": config['logs']['sys'], "postfix": config['logs']['postfix'], "smb": config['logs']['smb'], "zfs": config['logs']['zfs'], "alloc": config['logs']['alloc'], "httpd": config['logs']['httpd'], "header": config['header']} - pathfilter = dict((re.escape(k), v) for k, v in pathfilter.iteritems()) - pathpattern = re.compile("|".join(pathfilter.keys())) - - global varfilter - global varpattern - varfilter = {"$title$": config['title'], "$date$": datenow, "$time$": timenow, "$hostname$": hostname(), "$version$": VERSION, "$css$": os.path.relpath(config['css'], os.path.dirname(config['output']))} - varfilter = dict((re.escape(k), v) for k, v in varfilter.iteritems()) - varpattern = re.compile("|".join(varfilter.keys())) - - global tempfile - tempfile = open(config['output'], 'w+') - tempfile.write(header(config['header'])) - opentag('div', 1, 'main') - sshd() - sudo() - cron() - nameget() - httpd() - smbd() - postfix() - zfs() - temp() - du() - for tag in ['div', 'body', 'html']: - closetag(tag, 1) - tempfile.close() - mailprep(config['output'], MAILPATH) - if (config['mail']['to']): - logger.debug("sending email") - ms = subject(config['mail']['subject']) - cmd = "/bin/cat " + MAILPATH + " | /usr/bin/mail --debug-level=10 -a 'Content-type: text/html' -s '" + ms + "' " + config['mail']['to'] - logger.debug(cmd) - subprocess.call(cmd, shell=True) - logger.info("sent email") - - -def writetitle(title): - if (title == '' or '\n' in title): - logger.error("invalid title") - return - logger.debug("writing title for " + title) - tag('h2', 0, title) - -def writedata(subtitle, data = None): # write title and data to tempfile - if (subtitle == ""): - loggger.warning("no subtitle provided.. skipping section") - return - - if (data == None or len(data) == 0): - logger.debug("no data provided.. just printing subtitle") - tag('p', 0, subtitle) - else: - logger.debug("received data " + str(data)) - subtitle += ':' - if (len(data) == 1): - tag('p', 0, subtitle + ' ' + data[0]) - else: - tag('p', 0, subtitle) - opentag('ul', 1) - for datum in data: - tag('li', 0, datum) - closetag('ul', 1) - -def opentag(tag, block = 0, id = None, cl = None): # write html opening tag - if (block == 1): - tempfile.write('\n') - tempfile.write('<' + tag) - if (id != None): - tempfile.write(" id='" + id + "'") - if (cl != None): - tempfile.write(" class='" + cl + "'") - tempfile.write('>') - if (block == 1): - tempfile.write('\n') - -def closetag(tag, block = 0): # write html closing tag - if (block == 0): - tempfile.write("") - else: - tempfile.write("\n\n") - -def tag(tag, block = 0, content = ""): # write html opening tag, content, and html closing tag - opentag(tag, block) - tempfile.write(content) - closetag(tag, block) - -def header(template): # return a parsed html header from file - try: - copyfile(config['css'], config['dest'] + '/' + os.path.basename(config['css'])) - logger.debug("copied main.css") - except Exception as e: - logger.warning("could not copy main.css - " + str(e)) - headercontent = open(template, 'r').read() - headercontent = varpattern.sub(lambda m: varfilter[re.escape(m.group(0))], headercontent) - return headercontent - -def subject(template): - r = varpattern.sub(lambda m: varfilter[re.escape(m.group(0))], template) - logger.debug("returning subject line " + r) - return r - -def hostname(): # get the hostname of current server - hnfile = open(config['hostname-path'], 'r') - hn = re.search('^(.*)\n*', hnfile.read()).group(1) - return hn - -def getlocaldomain(): # get the parent fqdn of current server - domain = socket.getfqdn().split('.', 1) # Note: if socket.fetfqdn() returns localhost, make sure the first entry in /etc/hosts contains the fqdn - if len(domain) != 2: - logger.warning('Could not get domain of this server, only hostname. Please consider updating /etc/hosts') - return '' - else: - return domain[-1] - -def resolve(ip, fqdn = 'host-only'): # try to resolve an ip to hostname - # Possible values for fqdn: - # fqdn show full hostname and domain - # fqdn-implicit show hostname and domain unless local - # host-only only show hostname - # ip never resolve anything - # resolve-domains defined in individual sections of the config take priority over global config - - if not fqdn: - fqdn = config['resolve-domains'] - - if fqdn == 'ip': - return(ip) - - try: - socket.inet_aton(ip) # succeeds if text contains ip - hn = socket.gethostbyaddr(ip)[0] # resolve ip to hostname - if fqdn == 'fqdn-implicit' and hn.split('.', 1)[1] == LOCALDOMAIN: - return(hn.split('.')[0]) - elif fqdn == 'fqdn' or fqdn == 'fqdn-implicit': - return(hn) - elif fqdn == 'host-only': - return(hn.split('.')[0]) - else: - logger.warning("invalid value for fqdn config") - return(hn) - except socket.herror: - # cannot resolve ip - logger.debug(ip + " cannot be found, might not exist anymore") - return(ip) - except (OSError, socket.error): # socket.error for Python 2 compatibility - # already a hostname - logger.debug(ip + " is already a hostname") - return(ip) - except Exception as err: - logger.warning("failed to resolve hostname for " + ip + ": " + str(err)) - return(ip) # return ip if no hostname exists - -def plural(noun, quantity): # return "1 noun" or "n nouns" - if (quantity == 1): - return(str(quantity) + " " + noun) - else: - return(str(quantity) + " " + noun + "s") - -def parsesize(num, suffix='B'): # return human-readable size from number of bytes - for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: - if abs(num) < 1024.0: - return "%3.1f %s%s" % (num, unit, suffix) - num /= 1024.0 - return "%.1f%s%s" % (num, 'Yi', suffix) - -def readlog(path = None, mode = 'r'): # read file, substituting known paths - if (path == None): - logger.error("no path provided") - return - else: - path = pathpattern.sub(lambda m: pathfilter[re.escape(m.group(0))], path) - if (os.path.isfile(path) is False): - logger.error(path + " does not exist") - return '' - else: - return open(path, mode).read() - -def writelog(path = None, content = "", mode = 'w'): # read file, substituting known paths - if (path == None or content == None): - logger.error("invalid usage of writelog") - return - else: - path = pathpattern.sub(lambda m: pathfilter[re.escape(m.group(0))], path) - file = open(path, mode) - file.write(content) - file.close() - logger.debug("written to file " + path) - -def getusage(path): # Get disk usage statistics - disk = os.statvfs(path) - cap = float(disk.f_bsize*disk.f_blocks) # disk capacity - alloc = float(disk.f_bsize*(disk.f_blocks-disk.f_bfree)) # size of path - free = float(disk.f_bsize*disk.f_bfree) # free space on disk (blocks, not usable space) - ratio = alloc / cap * 100 # percentage used - return diskstat(cap, alloc, free, ratio) - -def orderbyfreq(l): # order a list by the frequency of its elements and remove duplicates - temp_l = l[:] - l = list(set(l)) - l = [[i, temp_l.count(i)] for i in l] # add count of each element - l.sort(key=lambda x:temp_l.count(x[0])) # sort by count - l = [i[0] + ' (' + str(i[1]) + ')' for i in l] # put element and count into string - l = l[::-1] # reverse - return l - -def addtag(l, tag): # add prefix and suffix tags to each item in a list - l2 = ['<' + tag + '>' + i + '' for i in l] - return l2 - -def truncl(input, limit): # truncate list - if (len(input) > limit): - more = str(len(input) - limit) - output = input[:limit] - output.append("+ " + more + " more") - return(output) - else: - return(input) - -def mailprep(inputpath, output, *stylesheet): - logger.debug("converting stylesheet to inline tags") - old = readlog(inputpath) - logger.debug(config['css']) - pm = premailer.Premailer(old, external_styles=config['css']) - MAILOUT = pm.transform() - logger.info("converted stylesheet to inline tags") - file = open(output, 'w') - file.write(MAILOUT) - file.close() - logger.info("written to temporary mail file") - - - -# -# -# - -def sshd(): - logger.debug("starting sshd section") - opentag('div', 1, 'sshd', 'section') - matches = re.findall('.*sshd.*Accepted publickey for .* from .*', readlog('auth')) # get all logins - users = [] # list of users with format [username, number of logins] for each item - data = [] - num = sum(1 for x in matches) # total number of logins - for match in matches: - entry = re.search('^.*publickey\sfor\s(\w*)\sfrom\s(\S*)', match) # [('user', 'ip')] - - user = entry.group(1) - ip = entry.group(2) - - userhost = user + '@' + resolve(ip, fqdn=config['sshd']['resolve-domains']) - exists = [i for i, item in enumerate(users) if re.search(userhost, item[0])] - if (exists == []): - users.append([userhost, 1]) - else: - users[exists[0]][1] += 1 - - writetitle('sshd') - subtitle = plural('login', num) + ' from' - if (len(users) == 1): # if only one user, do not display no of logins for this user - logger.debug("found " + str(len(matches)) + " ssh logins for user " + users[0][0]) - subtitle += ' ' + users[0][0] - writedata(subtitle) - else: - for user in users: - data.append(user[0] + ' (' + str(user[1]) + ')') - if len(data) > config['maxlist']: # if there are lots of users, truncate them - data.append('+ ' + str(len(users) - config['maxlist'] - 1) + " more") - break - logger.debug("found " + str(len(matches)) + " ssh logins for users " + str(data)) - writedata(subtitle, data) - closetag('div', 1) - logger.info("finished sshd section") - -# -# -# - -def sudo(): - logger.debug("starting sudo section") - opentag('div', 1, 'sudo', 'section') - umatches = re.findall('.*sudo:session\): session opened.*', readlog('auth')) - num = sum(1 for line in umatches) # total number of sessions - users = [] - data = [] - for match in umatches: - user = re.search('.*session opened for user root by (\S*)\(uid=.*\)', match).group(1) - exists = [i for i, item in enumerate(users) if re.search(user, item[0])] - if (exists == []): - users.append([user, 1]) - else: - users[exists[0]][1] += 1 - commands = [] - cmatches = re.findall('sudo:.*COMMAND\=(.*)', readlog('auth')) - for cmd in cmatches: - commands.append(cmd) -# logger.debug("found the following commands: " + str(commands)) - - writetitle("sudo") - subtitle = plural("sudo session", num) + " for" - if (len(users) == 1): - logger.debug("found " + str(num) + " sudo session(s) for user " + str(users[0])) - subtitle += ' ' + users[0][0] - writedata(subtitle) - else: - for user in users: - data.append(user[0] + ' (' + str(user[1]) + ')') - logger.debug("found " + str(num) + " sudo sessions for users " + str(data)) - writedata(subtitle, data) - if (len(commands) > 0): - commands = addtag(commands, 'code') - commands = orderbyfreq(commands) - commands = truncl(commands, config['maxcmd']) - writedata("top sudo commands", [c for c in commands]) - closetag('div', 1) - logger.info("finished sudo section") - -# -# -# - -def cron(): - logger.debug("starting cron section") - opentag('div', 1, 'cron', 'section') - matches = re.findall('.*CMD\s*\(\s*(?!.*cd)(.*)\)', readlog('cron')) - num = sum(1 for line in matches) - commands = [] - for match in matches: - commands.append(str(match)) - # commands.append([str(match)for match in matches]) - #logger.debug("found cron command " + str(commands)) - logger.info("found " + str(num) + " cron jobs") - subtitle = str(num) + " cron jobs run" - writetitle("cron") - writedata(subtitle) - if (matches > 0): - commands = addtag(commands, 'code') - commands = orderbyfreq(commands) - commands = truncl(commands, config['maxcmd']) - writedata("top cron commands", [c for c in commands]) - closetag('div', 1) - logger.info("finished cron section") - -# -# -# - -def nameget(): - logger.debug("starting nameget section") - opentag('div', 1, 'nameget', 'section') - logger.debug("reading syslog.. this may take a while") - syslog = readlog('sys') - failed = re.findall('.*nameget.*downloading of (.*) from .*failed.*', syslog) - n_f = sum(1 for i in failed) - l_f = [] - for i in failed: - l_f.append(i if i else '[no destination]') - logger.debug("the following downloads failed: " + str(l_f)) - succ = re.findall('.*nameget.*downloaded\s(.*)', syslog) - n_s = sum(1 for i in succ) - l_s = [] - for i in succ: - l_s.append(i) - logger.debug("the following downloads succeeded: " + str(l_f)) - logger.debug("found " + str(n_s) + " successful downloads, and " + str(n_f) + " failed attempts") - writetitle("nameget") - writedata(str(n_s) + " succeeded", truncl(l_s, config['maxlist'])) - writedata(str(n_f) + " failed", truncl(l_f, config['maxlist'])) - closetag('div', 1) - logger.info("finished nameget section") - -# -# -# - -def httpd(): - logger.info("starting httpd section") - opentag('div', 1, 'httpd', 'section') - accesslog = readlog("httpd/access.log") - a = len(accesslog.split('\n')) - errorlog = readlog("httpd/error.log") - e = len(errorlog.split('\n')) - data_b = 0 - ips = [] - files = [] - useragents = [] - errors = [] - notfound = [] - unprivileged = [] - - for line in accesslog.split('\n'): - fields = re.search('^(\S*) .*GET (\/.*) HTTP/\d\.\d\" 200 (\d*) \"(.*)\".*\((.*)\;', line) - try: - ips.append(resolve(fields.group(1), fqdn=config['httpd']['resolve-domains'])) - files.append(fields.group(2)) - useragents.append(fields.group(5)) - data_b += int(fields.group(3)) - except Exception as error: - if type(error) is AttributeError: # this line is not an access log - pass - else: - logger.warning("error processing httpd access log: " + str(error)) - traceback.print_exc() - logger.debug(str(data_b) + " bytes transferred") - data_h = parsesize(data_b) - writetitle("apache") - - logger.debug("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors") - if (a > 0): - files = addtag(files, 'code') - files = orderbyfreq(files) - files = truncl(files, config['maxlist']) - writedata(plural(" request", a), files) - if (ips != None): - ips = addtag(ips, 'code') - ips = orderbyfreq(ips) - n_ip = str(len(ips)) - ips = truncl(ips, config['maxlist']) - writedata(plural(" client", n_ip), ips) - if (useragents != None): - useragents = addtag(useragents, 'code') - useragents = orderbyfreq(useragents) - n_ua = str(len(useragents)) - useragents = truncl(useragents, config['maxlist']) - writedata(plural(" device", n_ua), useragents) - - writedata(data_h + " transferred") - writedata(plural(" error", e)) - - closetag('div', 1) - logger.info("finished httpd section") - -# -# -# - -def httpdsession(): - # logger.debug("starting httpd section") - opentag('div', 1, 'httpd', 'section') - httpdlog = requests.get(HTTPDSTATUS).content - uptime = re.search('.*uptime: (.*)<', httpdlog).group(1) - uptime = re.sub(' minute[s]', 'm', uptime) - uptime = re.sub(' second[s]', 's', uptime) - uptime = re.sub(' day[s]', 's', uptime) - uptime = re.sub(' month[s]', 'mo', uptime) - accesses = re.search('.*accesses: (.*) - .*', httpdlog).group(1) - traffic = re.search('.*Traffic: (.*)', httpdlog).group(1) - return("
httpd session: up " + uptime + ", " + accesses + " requests, " + traffic + " transferred") - closetag('div', 1) - # logger.info("finished httpd section") - -# -# -# - -def smbd(): - logger.debug("starting smbd section") - opentag('div', 1, 'smbd', 'section') - files = glob.glob(config['logs']['smb'] + "/log.*[!\.gz][!\.old]") # find list of logfiles - # for f in files: - - # file_mod_time = os.stat(f).st_mtime - - # Time in seconds since epoch for time, in which logfile can be unmodified. - # should_time = time.time() - (30 * 60) - - # Time in minutes since last modification of file - # last_time = (time.time() - file_mod_time) - # logger.debug(last_time) - - # if (file_mod_time - should_time) < args.time: - # print "CRITICAL: {} last modified {:.2f} minutes. Threshold set to 30 minutes".format(last_time, file, last_time) - # else: - - # if (datetime.timedelta(datetime.datetime.now() - datetime.fromtimestamp(os.path.getmtime(f))).days > 7): - # files.remove(f) - logger.debug("found log files " + str(files)) - n_auths = 0 # total number of logins from all users - sigma_auths = [] # contains users - output = "" - - for file in files: # one log file for each client - - logger.debug("looking at file " + file) - - # find the machine (ip or hostname) that this file represents - ip = re.search('log\.(.*)', file).group(1) # get ip or hostname from file path (/var/log/samba/log.host) - host = resolve(ip, fqdn=config['smbd']['resolve-domains']) - if (host == ip and (config['smbd']['resolve-domains'] or config['resolve-domains']) != 'ip'): # if ip has disappeared, fall back to a hostname from logfile - newhost = re.findall('.*\]\@\[(.*)\]', readlog(file)) - if (len(set(newhost)) == 1): # all hosts in one file should be the same - host = newhost[0].lower() - - # count number of logins from each user-host pair - matches = re.findall('.*(?:authentication for user \[|connect to service .* initially as user )(\S*)(?:\] .*succeeded| \()', readlog(file)) - for match in matches: - userhost = match + "@" + host - sigma_auths.append(userhost) - # exists = [i for i, item in enumerate(sigma_auths) if re.search(userhost, item[0])] - # if (exists == []): - # sigma_auths.append([userhost, 1]) - # else: - # sigma_auths[exists[0]][1] += 1 - n_auths += 1 - writetitle("samba") - subtitle = plural("login", n_auths) + " from" - if (len(sigma_auths) == 1): # if only one user, do not display no of logins for this user - subtitle += ' ' + sigma_auths[0][0] - writedata(subtitle) - else: # multiple users - sigma_auths = orderbyfreq(sigma_auths) - sigma_auths = truncl(sigma_auths, config['maxlist']) - logger.debug("found " + str(n_auths) + " samba logins for users " + str(sigma_auths)) - writedata(subtitle, sigma_auths) - closetag('div', 1) - logger.info("finished smbd section") - -# -# -# - -def postfix(): - logger.debug("starting postfix section") - opentag('div', 1, 'postfix', 'section') - messages = re.findall('.*from\=<(.*)>, size\=(\d*),.*\n.*to=<(.*)>', readlog('postfix')) - r = [] - s = [] - size = 0 - for message in messages: - r.append(message[2]) - s.append(message[0]) - size += int(message[1]) - # size = sum([int(x) for x in messages]) - size = parsesize(size) - n = str(len(messages)) - writetitle("postfix") - - if (len(r) > 0): - s = list(set(r)) # unique recipients - if (len(s) > 1): - r = orderbyfreq(r) - r = truncl(r, config['maxlist']) - writedata(n + " messages sent to", r) - else: - writedata(n + " messages sent to " + r[0]) - else: - writedata(n + " messages sent") - writedata("total of " + size) - closetag('div', 1) - logger.info("finished postfix section") - -# -# -# - -def zfs(): - logger.debug("starting zfs section") - opentag('div', 1, 'zfs', 'section') - zfslog = readlog('zfs') - pool = re.search('.*---\n(\w*)', zfslog).group(1) - scrub = re.search('.*scrub repaired (\d*).* in .*\d*h\d*m with (\d*) errors on (\S*\s)(\S*)\s(\d+\s)', zfslog) - iostat = re.search('.*---\n\w*\s*(\S*)\s*(\S*)\s', zfslog) - scrubrepairs = scruberrors = scrubdate = None - try: - scrubrepairs = scrub.group(1) - scruberrors = scrub.group(2) - scrubdate = scrub.group(3) + scrub.group(5) + scrub.group(4) - except Exception as e: - logger.debug("error getting scrub data: " + str(e)) - alloc = iostat.group(1) - free = iostat.group(2) - writetitle("zfs") - if (scrubdate != None): - subtitle = "Scrub of " + pool + " on " + scrubdate - data = [scrubrepairs + " repaired", scruberrors + " errors", alloc + " used", free + " free"] - else: - subtitle = pool - data = [alloc + " used", free + " free"] - writedata(subtitle, data) - closetag('div', 1) - logger.info("finished zfs section") - -# -# -# - -def temp(): - logger.debug("starting temp section") - opentag('div', 1, 'temp', 'section') - - # cpu temp - - sensors.init() - coretemps = [] - pkgtemp = 0 - systemp = 0 - try: - for chip in sensors.iter_detected_chips(): - for feature in chip: - if "Core" in feature.label: - coretemps.append([feature.label, feature.get_value()]) - logger.debug("found core " + feature.label + " at temp " + str(feature.get_value())) - if "CPUTIN" in feature.label: - pkgtemp = str(feature.get_value()) - logger.debug("found cpu package at temperature " + pkgtemp) - if "SYS" in feature.label: - systemp = feature.get_value() - logger.debug("found sys input " + feature.label + " at temp " + str(feature.get_value())) - core_avg = reduce(lambda x, y: x[1] + y[1], coretemps) / len(coretemps) - logger.debug("average cpu temp is " + str(core_avg)) - coretemps.append(["avg", str(core_avg)]) - coretemps.append(["pkg", pkgtemp]) - coretemps = [x[0] + ": " + str(x[1]) + DEG + CEL for x in coretemps] - finally: - sensors.cleanup() - - # drive temp - - # For this to work, `hddtemp` must be running in daemon mode. - # Start it like this (bash): sudo hddtemp -d /dev/sda /dev/sdX... - - received = '' - sumtemp = 0.0 - data = "" - output = [] - - try: - hsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - hsock.connect(("localhost", int(config['hddtemp']['port']))) - logger.debug("tcp socket on port " + str(int(config['hddtemp']['port'])) + " opened for `hddtemp` (ensure daemon is running)") - hsock.sendall('') # send dummy packet and shut write conn - hsock.shutdown(socket.SHUT_WR) - - while 1: - line = hsock.recv(1024) - if line == "": # exit on blank line - break - logger.debug("received line " + str(line)) - data += line - hsock.close() - logger.debug("closed connection, having received " + str(sys.getsizeof(data)) + " bytes") - - data = data.lstrip('|').rstrip('|') # remove leading & trailing `|` - drives = data.split('|' * 2) # split into drives - - for drive in drives: - fields = drive.split('|') - if fields[0] in config['hddtemp']['drives']: - output.append(fields[0] + (' (' + fields[1] + ')' if config['hddtemp']['show-model'] else '')+ ': ' + fields[2] + DEG + fields[3]) - sumtemp += int(fields[2]) - logger.debug("added drive " + fields[0]) - else: - logger.debug("ignoring drive " + fields[0]) - - hddavg = int(format(sumtemp/float(len(drives)))) + e + DEG + output[0][-1:] # use units of first drive (last character of output) - logger.debug("avg disk temp is " + str(hddavg)) - output.append("avg: " + str(hddavg)) - except Exception as ex: - logger.debug("failed getting hddtemps with error " + str(ex)) - finally: - hsock.close() - - writetitle("temperatures") - if (systemp != 0): - writedata("sys: " + str(systemp) + DEG) - if (coretemps != ''): - writedata("cores", coretemps) - if (config['hddtemp']['drives'] != ''): - writedata("disks", output) - - closetag('div', 1) - logger.info("finished temp section") - -# -# -# - -def du(): - logger.debug("starting du section") - opentag('div', 1, 'du', 'section') - out = [] - content = readlog('alloc') - contentnew = "" - for path in config['du']['paths']: - alloc_f = getusage(path).alloc - delta = None - try: - alloc_i = re.search(path + '\t(.*)\n', content).group(1) - delta = alloc_f - float(alloc_i) - except: - pass - if (delta == None): - out.append([path, "used " + parsesize(alloc_f)]) - else: - out.append([path, "used " + parsesize(alloc_f), "delta " + parsesize(delta)]) - contentnew += (path + '\t' + str(alloc_f) + '\n') - if config['rotate'] == 'y' or config['du']['force-write'] == 'y': - writelog('alloc', contentnew) - - writetitle("du") - logger.debug("disk usage data is " + str(out)) - for path in out: - writedata(path[0], [p for p in path[1:]]) - - closetag('div', 1) - logger.info("finished du section") - -# -# -# -starttime = datetime.datetime.now() -timenow = time.strftime("%H:%M:%S") -datenow = time.strftime("%x") - -def loadconf(configfile): - try: - data = yaml.safe_load(open(configfile)) - for value in data: - if(type(data[value]) == types.DictType): - for key in data[value].iteritems(): - config[value][key[0]] = key[1] - else: - config[value] = data[value] - config['dest'] = os.path.dirname(config['output']) - if parser.parse_args().to is not None: config['mail']['to'] = parser.parse_args().to - except Exception as e: - logger.warning("error processing config: " + str(e)) - - -try: - __main__() -finally: - # rotate logs using systemd logrotate - if parser.parse_args().function is None: - if (config['rotate'] == 'y'): - subprocess.call("/usr/sbin/logrotate -f /etc/logrotate.conf", shell=True) - logger.info("rotated logfiles") - else: - logger.debug("user doesn't want to rotate logs") - if (config['rotate'] == 's'): - logger.debug("Here is the output of `logrotate -d /etc/logrotate.conf` (simulated):") - sim = subprocess.check_output("/usr/sbin/logrotate -d /etc/logrotate.conf", shell=True) - logger.debug(sim) - - timenow = time.strftime("%H:%M:%S") - datenow = time.strftime("%x") - logger.info("finished parsing logs at " + datetime.datetime.now().strftime("%x %H:%M:%S") + " (" + str(datetime.datetime.now() - starttime) + ")") diff --git a/logparse.yaml b/logparse.yaml deleted file mode 100755 index a9dcaa8..0000000 --- a/logparse.yaml +++ /dev/null @@ -1,15 +0,0 @@ - -output: /mnt/andrew/temp/logparse/summary.html -mail: - to: andrew@lorimer.id.au -hddtemp: - drives: - - /dev/sda - - /dev/sdc - - /dev/sdd - - /dev/sde - port: 7634 -du-paths: - - /home/andrew - - /mnt/andrew -rotate: n \ No newline at end of file diff --git a/logparse/__init__.py b/logparse/__init__.py new file mode 100644 index 0000000..ce4b891 --- /dev/null +++ b/logparse/__init__.py @@ -0,0 +1,2 @@ +__version__ = '1.0' +__name__ = 'logparse' diff --git a/logparse/__main__.py b/logparse/__main__.py new file mode 100644 index 0000000..871cac3 --- /dev/null +++ b/logparse/__main__.py @@ -0,0 +1,8 @@ +# +# __main__.py +# +# Executed when the logparse directory is executed as a script + +from .interface import main +__version__ = '1.0' +main() diff --git a/logparse/config.py b/logparse/config.py new file mode 100644 index 0000000..ac8cc9f --- /dev/null +++ b/logparse/config.py @@ -0,0 +1,95 @@ +# +# config.py +# +# Default config values and basic wrapper for PyYaml. New config options +# should be added to the dictionary below, along with appropriate defaults. +# + +import yaml +import types +import os +from pkg_resources import Requirement, resource_filename + +import logparse +import logging +logger = logging.getLogger(__name__) + +def locate(filename): + logger.debug("Searching for {0}".format(filename)) + loc = resource_filename(Requirement.parse(__package__), filename) + logger.debug("Found {0}".format(loc)) + return loc + +prefs = { + 'output': '/var/www/logparse/summary.html', + 'header': '/etc/logparse/header.html', + 'css': '/etc/logparse/main.css', + 'title': logparse.__name__, + 'maxlist': 10, + 'maxcmd': 3, + 'resolve-domains': 'fqdn', + 'mail': { + 'to': '', + 'from': '', + 'subject': 'logparse from $hostname$', + 'mailbin': '/usr/bin/mail', + }, + 'rotate': 'n', + 'verbose': 'n', + 'hddtemp': { + 'drives': ['/dev/sda'], + 'host': 'localhost', + 'separator': '|', + 'timeout': 10, + 'port': 7634, + 'show-model': False, + }, + 'apache': { + 'resolve-domains': '', + }, + 'sshd': { + 'resolve-domains': '', + }, + 'smbd': { + 'resolve-domains': '', + }, + 'httpd': { + 'resolve-domains': '', + }, + 'du': { + 'paths': ['/', '/etc', '/home'], + 'force-write': 'n', + }, + 'hostname-path': '/etc/hostname', + 'logs': { + 'auth': '/var/log/auth.log', + 'cron': '/var/log/cron.log', + 'sys': '/var/log/syslog', + 'smb': '/var/log/samba', + 'zfs': '/var/log/zpool.log', + 'alloc': '/var/log/du.log', + 'postfix': '/var/log/mail.log', + 'httpd': '/var/log/apache2' + } +} + +def loadconf(argparser, configfile = "/etc/logparse/logparse.conf"): + logger.debug("getting config from {0}".format(configfile)) + try: + data = yaml.safe_load(open(configfile)) + for value in data: + if(isinstance(data[value], dict)): + for key in data[value].items(): + prefs[value][key[0]] = key[1] + else: + prefs[value] = data[value] +# config['dest'] = paths.dirname(config['output']) + if argparser.parse_args().to is not None: + prefs['mail']['to'] = argparser.parse_args().to + if not prefs['mail']['to']: + logger.info("no recipient address provided, outputting to stdout") + else: + logger.info("email will be sent to " + prefs['mail']['to']) + return prefs + except Exception as e: + logger.warning("error processing config: " + str(e)) diff --git a/logparse/formatting.py b/logparse/formatting.py new file mode 100644 index 0000000..5350bb6 --- /dev/null +++ b/logparse/formatting.py @@ -0,0 +1,141 @@ +# +# format.py +# +# This file contains global functions for formatting and printing data. This +# file should be imported into individual log-parsing scripts located in +# logs/*. Data is all formatted in HTML. Writing to disk and/or emailng data +# is left to __main__.py. +# + +import os +import re +import locale + +from .config import prefs +#import util +#import interface +import logparse +from . import interface, util + +import logging +logger = logging.getLogger(__name__) + +locale.setlocale(locale.LC_ALL, '') # inherit system locale +#DEG = "°".encode('unicode_escape') +DEG = u'\N{DEGREE SIGN}' +CEL = "C" +TIMEFMT = "%X" +DATEFMT = "%x" + +def init_varfilter(): + global varfilter + global varpattern + varfilter = {"$title$": prefs['title'], "$date$": interface.start.strftime(DATEFMT),"$time$": interface.start.strftime(TIMEFMT), "$hostname$": util.hostname(prefs['hostname-path']), "$version$": logparse.__version__, "$css$": os.path.relpath(prefs['css'], os.path.dirname(prefs['output']))} + varfilter = dict((re.escape(k), v) for k, v in varfilter.items()) + varpattern = re.compile("|".join(varfilter.keys())) + +def writetitle(title): # write title for a section + if (title == '' or '\n' in title): + logger.error("Invalid title") + raise ValueError + logger.debug("Writing title for " + title) + return tag('h2', 0, title) + +def opentag(tag, block = 0, id = None, cl = None): # write html opening tag + output = "" + if (block): + output += '\n' + output += '<' + tag + if (id != None): + output += " id='" + id + "'" + if (cl != None): + output += " class='" + cl + "'" + output += '>' + if (block): + output += '\n' + return output + +def closetag(tag, block = 0): # write html closing tag + if (block == 0): + return "" + else: + return "\n\n" + +def tag(tag, block = 0, content = ""): # write html opening tag, content, and html closing tag + o = opentag(tag, block) + c = closetag(tag, block) + return o + content + c + +def header(template): # return a parsed html header from file +# try: +# copyfile(config['css'], config['dest'] + '/' + os.path.basename(config['css'])) +# logger.debug("copied main.css") +# except Exception as e: +# logger.warning("could not copy main.css - " + str(e)) + init_varfilter() + headercontent = open(template, 'r').read() + headercontent = varpattern.sub(lambda m: varfilter[re.escape(m.group(0))], headercontent) + return headercontent + +def orderbyfreq(l): # order a list by the frequency of its elements and remove duplicates + temp_l = l[:] + l = list(set(l)) + l = [[i, temp_l.count(i)] for i in l] # add count of each element + l.sort(key=lambda x:temp_l.count(x[0])) # sort by count + l = [i[0] + ' (' + str(i[1]) + ')' for i in l] # put element and count into string + l = l[::-1] # reverse + return l + +def addtag(l, tag): # add prefix and suffix tags to each item in a list + l2 = ['<' + tag + '>' + i + '' for i in l] + return l2 + +def truncl(input, limit): # truncate list + if (len(input) > limit): + more = str(len(input) - limit) + output = input[:limit] + output.append("+ " + more + " more") + return(output) + else: + return(input) + +def plural(noun, quantity): # return "1 noun" or "n nouns" + if (quantity == 1): + return(str(quantity) + " " + noun) + else: + return(str(quantity) + " " + noun + "s") + +def parsesize(num, suffix='B'): # return human-readable size from number of bytes + for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: + if abs(num) < 1024.0: + return "%3.1f %s%s" % (num, unit, suffix) + num /= 1024.0 + return "%.1f%s%s" % (num, 'Yi', suffix) + +def fsubject(template): # Replace variables in the title template provided in config + r = varpattern.sub(lambda m: varfilter[re.escape(m.group(0))], template) + logger.debug("Returning subject line " + r) + return r + +def writedata(subtitle, data = None): # write title and data + if (subtitle == ""): + logger.warning("No subtitle provided.. skipping section") + return + + if (data == None or len(data) == 0): + logger.debug("No data provided.. just printing subtitle") + return tag('p', 0, subtitle) + else: + logger.debug("Received data " + str(data)) + subtitle += ':' + if (len(data) == 1): + return tag('p', 0, subtitle + ' ' + data[0]) + else: + output = "" + output += tag('p', 0, subtitle) + output += opentag('ul', 1) + for datum in data: + output += tag('li', 0, datum) + output += closetag('ul', 1) + return output + diff --git a/logparse/interface.py b/logparse/interface.py new file mode 100644 index 0000000..5913b6b --- /dev/null +++ b/logparse/interface.py @@ -0,0 +1,115 @@ +# +# __main__.py +# +# This module is the entrypoint of the `logparse` shell command and also +# contains single-use functions which don't fit elsewhere. +# + +import logging, logging.handlers +import argparse +import os +import glob +import sys +from datetime import datetime + +import logparse +from . import config +from logparse import formatting, mail +from .parsers import load_parsers, sudo, sshd, cron, httpd, smbd, postfix, zfs, temperature + +def rotate(): + # rotate logs using systemd logrotate + if parser.parse_args().function is None: + if (config.prefs['rotate'] == 'y'): + subprocess.call("/usr/sbin/logrotate -f /etc/logrotate.conf", shell=True) + logger.info("rotated logfiles") + else: + logger.debug("user doesn't want to rotate logs") + if (config.prefs['rotate'] == 's'): + logger.debug("Here is the output of `logrotate -d /etc/logrotate.conf` (simulated):") + sim = subprocess.check_output("/usr/sbin/logrotate -d /etc/logrotate.conf", shell=True) + logger.debug(sim) + + + +def main(): + # Get arguments + parser = argparse.ArgumentParser(description='grab logs of some common services and send them by email') + parser.add_argument('-t','--to', help='mail recipient (\"to\" address)', required=False) + parser.add_argument('-c', '--config', help='path to config file', required=False) + parser.add_argument('-p', '--print', help='print HTML to stdout', required=False, dest='printout', action='store_true', default=False) + parser.add_argument('-d', '--destination', help='file to output HTML', required=False) + parser.add_argument('-f', '--overwrite', help='force overwrite an existing output file', required=False, action='store_true', default=False) + parser.add_argument('-v', '--verbose', help='verbose console/syslog output (for debugging)', required=False, default=False, action='store_true') + parser.add_argument('-r', '--rotate', help='force rotate log files using systemd logrotate', required=False, default=False, action='store_true') + parser.add_argument('-nr', '--no-rotate', help='do not rotate logfiles (overrides logparse.conf)', required=False, default=False, action='store_true') + parser.add_argument('-l', '--logs', help='services to analyse', required=False) + + # Set up logging + logger = logging.getLogger(__name__) + loghandler = logging.handlers.SysLogHandler(address = '/dev/log') + loghandler.setFormatter(logging.Formatter(fmt='logparse.py[' + str(os.getpid()) + ']: %(message)s')) + loghandler.setLevel(logging.WARNING) # don't spam syslog with debug messages + if parser.parse_args().verbose: + print("Verbose mode is on") + logging.basicConfig(level=logging.DEBUG) + logger.debug("Verbose mode turned on") + else: + logging.basicConfig(level=logging.INFO) + logger.addHandler(loghandler) + + # Load config + if parser.parse_args().config or config.prefs['verbose']: + config.prefs = config.loadconf(parser.parse_args().config, parser) + else: + config.prefs = config.loadconf(argparser=parser) + logger.debug("Finished loading config") + + # Time analysis + global start + start = datetime.now() + logger.info("Beginning log analysis at {0} {1}".format(start.strftime(formatting.DATEFMT), start.strftime(formatting.TIMEFMT))) + logger.debug("This is {0} version {1}, running on Python {2}".format(logparse.__name__, logparse.__version__, sys.version)) + +# for l in parser.parse_args().logs.split(' '): +# eval(l) +# sys.exit() + +# print(load_parsers.search()); + # Write HTML document + global output_html + output_html = formatting.header(config.prefs['header']) + output_html += sudo.parse_log() + output_html += sshd.parse_log() + output_html += cron.parse_log() + output_html += httpd.parse_log() + output_html += smbd.parse_log() + output_html += postfix.parse_log() + output_html += zfs.parse_log() + output_html += temperature.parse_log() + output_html += formatting.closetag('body') + formatting.closetag('html') + if parser.parse_args().printout: + print(output_html) + if parser.parse_args().destination: + logger.debug("Outputting to {0}".format(parser.parse_args().destination)) + if not os.path.isfile(parser.parse_args().destination) and not parser.parse_args().overwrite: + with open(parser.parse_args().destination, 'w') as f: + f.write(output_html) + logger.info("Written output to {}".format(parser.parse_args().destination)) + else: + logger.warning("Destination file already exists") + if input("Would you like to overwrite {0}? (y/n) [n] ".format(parser.parse_args().destination)) == 'y': + with open(parser.parse_args().destination, 'w') as f: + f.write(output_html) + logger.debug("Written output to {}".format(parser.parse_args().destination)) + else: + logger.warning("No output written") + + if parser.parse_args().to: + mail.sendmail(mailbin=config.prefs['mail']['mailbin'], body=output_html, recipient=parser.parse_args().to, subject="logparse test") + + # Print end message + finish = datetime.now() + logger.info("Finished parsing logs at {0} {1} (total time: {2})".format(finish.strftime(formatting.DATEFMT), finish.strftime(formatting.TIMEFMT), finish - start)) + + return diff --git a/logparse/logio.py b/logparse/logio.py new file mode 100644 index 0000000..e69de29 diff --git a/logparse/mail.py b/logparse/mail.py new file mode 100644 index 0000000..9e840a4 --- /dev/null +++ b/logparse/mail.py @@ -0,0 +1,49 @@ +# +# email.py +# +# This module is essentially a wrapper for Python's premailer and whatever +# the default mail handler is. +# + +import logging +logger = logging.getLogger(__name__) + +from os.path import isfile +from premailer import transform +from email.mime.text import MIMEText +import subprocess + +def mailprep(htmlin, stylesheet): + logger.debug("Converting stylesheet " + stylesheet + " to inline tags") + if not isfile(stylesheet): + logger.warning("Cannot read stylesheet {}: file does not exist".format(stylesheet)) + raise FileNotFoundError + pm = premailer.Premailer(htmlin, external_styles=stylesheet) + htmlout = pm.transform() + logger.info("Converted stylesheet to inline tags") + return htmlout + + +def sendmail(mailbin, body, recipient, subject, *sender): + logger.debug("Sending email") + msg = MIMEText(body) + if sender: + msg["From"] = sender + msg["To"] = recipient + msg["Subject"] = subject + mailproc = subprocess.Popen([mailbin, '--debug-level=10', '-t'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) + logger.debug("Compiled message and opened process") + try: + stdout = mailproc.communicate(msg.as_bytes(), timeout=15) + logger.debug("sendmail output: {}".format(stdout)) + logger.info("Sent email") + return 0 +# except TimeoutExpired: +# mailproc.kill() +# stdout = mailproc.communicate() +# logger.debug("Timeout expired: {}".format(stdout)) +# raise subprocess.TimeoutError + except Exception as e: + mailproc.kill() + logger.warning("Failed to send message: {0}".format(str(e))) +# raise ChildProcessError diff --git a/logparse/parsers/__init__.py b/logparse/parsers/__init__.py new file mode 100644 index 0000000..5aa0845 --- /dev/null +++ b/logparse/parsers/__init__.py @@ -0,0 +1 @@ +__all__ = ["load_parsers", "sudo", "sshd"] diff --git a/logparse/parsers/cron.py b/logparse/parsers/cron.py new file mode 100644 index 0000000..2c7289b --- /dev/null +++ b/logparse/parsers/cron.py @@ -0,0 +1,38 @@ +# +# cron.py +# +# List the logged (executed) cron jobs and their commands +# TODO: also output a list of scheduled (future) jobs + +import re + +from ..formatting import * +from ..util import readlog, resolve +from .. import config + +import logging +logger = logging.getLogger(__name__) + +def parse_log(): + output = '' + logger.debug("Starting cron section") + output += opentag('div', 1, 'cron', 'section') + matches = re.findall('.*CMD\s*\(\s*(?!.*cd)(.*)\)', readlog(config.prefs['logs']['cron'])) + num = sum(1 for line in matches) + commands = [] + for match in matches: + commands.append(str(match)) + # commands.append([str(match)for match in matches]) + #logger.debug("found cron command " + str(commands)) + logger.info("Found " + str(num) + " cron jobs") + subtitle = str(num) + " cron jobs run" + output += writetitle("cron") + output += writedata(subtitle) + if (len(matches) > 0): + commands = addtag(commands, 'code') + commands = orderbyfreq(commands) + commands = truncl(commands, config.prefs['maxcmd']) + output += writedata("top cron commands", [c for c in commands]) + output += closetag('div', 1) + logger.info("Finished cron section") + return output diff --git a/logparse/parsers/httpd.py b/logparse/parsers/httpd.py new file mode 100644 index 0000000..243af06 --- /dev/null +++ b/logparse/parsers/httpd.py @@ -0,0 +1,80 @@ +# +# httpd.py +# +# Analyse Apache (httpd) server logs, including data transferred, requests, +# clients, and errors. Note that Apache's logs can get filled up very quickly +# with the default verbosity, leading to logparse taking a very long time to +# analyse them. In general the default verbosity is good, but logs should be +# cleared as soon as they are analysed (make sure 'rotate' is set to 'y'). +# + +import re + +from ..formatting import * +from ..util import readlog, resolve +from .. import config + +import logging +logger = logging.getLogger(__name__) + +def parse_log(): + output = '' + logger.debug("Starting httpd section") + output += opentag('div', 1, 'httpd', 'section') + accesslog = readlog(config['logs']['httpd'] + '/access.log') + a = len(accesslog.split('\n')) + errorlog = readlog(config['logs']['httpd'] + '/error.log') + e = len(errorlog.split('\n')) + data_b = 0 + ips = [] + files = [] + useragents = [] + errors = [] + notfound = [] + unprivileged = [] + + logger.debug("Searching through access log") + for line in accesslog.split('\n'): + fields = re.search('^(\S*) .*GET (\/.*) HTTP/\d\.\d\" 200 (\d*) \"(.*)\".*\((.*)\;', line) + try: + ips.append(resolve(fields.group(1), fqdn=config.prefs['httpd']['resolve-domains'])) + files.append(fields.group(2)) + useragents.append(fields.group(5)) + data_b += int(fields.group(3)) + except Exception as error: + if type(error) is AttributeError: # this line is not an access log + pass + else: + logger.warning("Error processing httpd access log: " + str(error)) + traceback.print_exc() + data_h = parsesize(data_b) + output += writetitle("apache") + + logger.info("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors") + if (a > 0): + logger.debug("Parsing request statistics (this might take a while)") + files = addtag(files, 'code') + files = orderbyfreq(files) + files = truncl(files, config.prefs['maxlist']) + output += writedata(plural(" request", a), files) + if (ips != None): + logger.debug("Parsing client statistics") + ips = addtag(ips, 'code') + ips = orderbyfreq(ips) + n_ip = str(len(ips)) + ips = truncl(ips, config.prefs['maxlist']) + output += writedata(plural(" client", n_ip), ips) + if (useragents != None): + logger.debug("Parsing user agent statistics") + useragents = addtag(useragents, 'code') + useragents = orderbyfreq(useragents) + n_ua = str(len(useragents)) + useragents = truncl(useragents, config.prefs['maxlist']) + output += writedata(plural(" device", n_ua), useragents) + + output += writedata(data_h + " transferred") + output += writedata(plural(" error", e)) + + output += closetag('div', 1) + logger.info("Finished httpd section") + return output diff --git a/logparse/parsers/load_parsers.py b/logparse/parsers/load_parsers.py new file mode 100644 index 0000000..fcb449e --- /dev/null +++ b/logparse/parsers/load_parsers.py @@ -0,0 +1,32 @@ +# +# load_parsers.py +# +# Search for and load files which parse logs for particular services +# + +import imp +import os +import glob + +parser_dir = "/usr/share/logparse/" +main_module = "__init__.py" + +import logging +logger = logging.getLogger(__name__) + +def search(): + logger.debug("Searching for parsers in {0}".format(parser_dir)) + parsers = [] + parser_candidates = glob.glob(os.path.join(os.path.dirname(parser_dir), "*.py")) + for p in parser_candidates: + location = os.path.join(parser_dir, p) + if not os.path.isdir(parser_dir) or not main_module + '.py' in os.listdir(location): + continue + info = imp.find_module(main_module, [location]) + parsers.append({"name": p, "info": info}) + return parsers + +def load(parser): + logger.debug("Loading {0}".format(parser["name"])) + return imp.load_module(parser["name"], *parser["info"]) + diff --git a/logparse/parsers/postfix.py b/logparse/parsers/postfix.py new file mode 100644 index 0000000..90190f3 --- /dev/null +++ b/logparse/parsers/postfix.py @@ -0,0 +1,50 @@ +# +# postfix.py +# +# Get message statistics from postfix/sendmail logs +# + +import re + +from ..formatting import * +from ..util import readlog, resolve +from .. import config + +import logging +logger = logging.getLogger(__name__) + +def parse_log(): + output = '' + logger.debug("Starting postfix section") + output += opentag('div', 1, 'postfix', 'section') + output += writetitle("postfix") + logger.debug("Searching through postfix logs") + messages = re.findall('.*from\=<(.*)>, size\=(\d*),.*\n.*to=<(.*)>', readlog(config.prefs['logs']['postfix'])) + r = [] + s = [] + size = 0 + logger.debug("Analysing message size") + for message in messages: + r.append(message[2]) + s.append(message[0]) + size += int(message[1]) + # size = sum([int(x) for x in messages]) + size = parsesize(size) + n = str(len(messages)) + + logger.debug("Analysing message recipients") + if (len(r) > 0): + s = list(set(r)) # unique recipients + if (len(s) > 1): + r = orderbyfreq(r) + r = truncl(r, config.prefs['maxlist']) + output += writedata(n + " messages sent to", r) + else: + output += writedata(n + " messages sent to " + r[0]) + else: + output += writedata(n + " messages sent") + logger.info("Found {0} messages sent to {1} recipients".format(n, str(len(r)))) + output += writedata("total of " + size) + output += closetag('div', 1) + logger.info("Finished postfix section") + return output diff --git a/logparse/parsers/smbd.py b/logparse/parsers/smbd.py new file mode 100644 index 0000000..1bf06bd --- /dev/null +++ b/logparse/parsers/smbd.py @@ -0,0 +1,79 @@ +# +# smbd.py +# +# Get login statistics for a samba server. +# TODO: add feature to specify shares to check in config file +# + +import re +import glob + +from ..formatting import * +from ..util import readlog, resolve +from .. import config + +import logging +logger = logging.getLogger(__name__) + +def parse_log(): + output = '' + logger.debug("Starting smbd section") + output += opentag('div', 1, 'smbd', 'section') + files = glob.glob(config.prefs['logs']['smb'] + "/log.*[!\.gz][!\.old]") # find list of logfiles + # for f in files: + + # file_mod_time = os.stat(f).st_mtime + + # Time in seconds since epoch for time, in which logfile can be unmodified. + # should_time = time.time() - (30 * 60) + + # Time in minutes since last modification of file + # last_time = (time.time() - file_mod_time) + # logger.debug(last_time) + + # if (file_mod_time - should_time) < args.time: + # print "CRITICAL: {} last modified {:.2f} minutes. Threshold set to 30 minutes".format(last_time, file, last_time) + # else: + + # if (datetime.timedelta(datetime.datetime.now() - datetime.fromtimestamp(os.path.getmtime(f))).days > 7): + # files.remove(f) + logger.debug("Found log files " + str(files)) + n_auths = 0 # total number of logins from all users + sigma_auths = [] # contains users + + for file in files: # one log file for each client + + logger.debug("Looking at file " + file) + + # find the machine (ip or hostname) that this file represents + ip = re.search('log\.(.*)', file).group(1) # get ip or hostname from file path (/var/log/samba/log.host) + host = resolve(ip, fqdn=config.prefs['smbd']['resolve-domains']) + if (host == ip and (config.prefs['smbd']['resolve-domains'] or config.prefs['resolve-domains']) != 'ip'): # if ip has disappeared, fall back to a hostname from logfile + newhost = re.findall('.*\]\@\[(.*)\]', readlog(file)) + if (len(set(newhost)) == 1): # all hosts in one file should be the same + host = newhost[0].lower() + + # count number of logins from each user-host pair + matches = re.findall('.*(?:authentication for user \[|connect to service .* initially as user )(\S*)(?:\] .*succeeded| \()', readlog(file)) + for match in matches: + userhost = match + "@" + host + sigma_auths.append(userhost) + # exists = [i for i, item in enumerate(sigma_auths) if re.search(userhost, item[0])] + # if (exists == []): + # sigma_auths.append([userhost, 1]) + # else: + # sigma_auths[exists[0]][1] += 1 + n_auths += 1 + output += writetitle("samba") + subtitle = plural("login", n_auths) + " from" + if (len(sigma_auths) == 1): # if only one user, do not display no of logins for this user + subtitle += ' ' + sigma_auths[0][0] + output += writedata(subtitle) + else: # multiple users + sigma_auths = orderbyfreq(sigma_auths) + sigma_auths = truncl(sigma_auths, config.prefs['maxlist']) + logger.debug("Found {0} samba logins".format(str(n_auths))) + output += writedata(subtitle, sigma_auths) + output += closetag('div', 1) + logger.info("Finished smbd section") + return output diff --git a/logparse/parsers/sshd.py b/logparse/parsers/sshd.py new file mode 100644 index 0000000..524312e --- /dev/null +++ b/logparse/parsers/sshd.py @@ -0,0 +1,57 @@ +# +# sshd.py +# +# Find number of ssh logins and authorised users +# + +import re + +from ..formatting import * +from ..util import readlog, resolve +from .. import config + +import logging +logger = logging.getLogger(__name__) + +def parse_log(): + output = '' + logger.debug("Starting sshd section") + output += opentag('div', 1, 'sshd', 'section') + logger.debug("Searching for matches in {0}".format(config.prefs['logs']['auth'])) + matches = re.findall('.*sshd.*Accepted publickey for .* from .*', readlog(config.prefs['logs']['auth'])) # get all logins + logger.debug("Finished searching for logins") + + users = [] # list of users with format [username, number of logins] for each item + data = [] + num = sum(1 for x in matches) # total number of logins + for match in matches: + entry = re.search('^.*publickey\sfor\s(\w*)\sfrom\s(\S*)', match) # [('user', 'ip')] + + user = entry.group(1) + ip = entry.group(2) + + userhost = user + '@' + resolve(ip, fqdn=config.prefs['sshd']['resolve-domains']) + exists = [i for i, item in enumerate(users) if re.search(userhost, item[0])] + if (exists == []): + users.append([userhost, 1]) + else: + users[exists[0]][1] += 1 + logger.debug("Parsed list of authorised users") + + output += writetitle('sshd') + subtitle = plural('login', num) + ' from' + if (len(users) == 1): # if only one user, do not display no of logins for this user + logger.debug("found " + str(len(matches)) + " ssh logins for user " + users[0][0]) + subtitle += ' ' + users[0][0] + output += writedata(subtitle) + else: + for user in users: + data.append(user[0] + ' (' + str(user[1]) + ')') + if len(data) > config.prefs['maxlist']: # if there are lots of users, truncate them + data.append('+ ' + str(len(users) - config.prefs['maxlist'] - 1) + " more") + break + logger.debug("found " + str(len(matches)) + " ssh logins for users " + str(data)) + output += writedata(subtitle, data) + output += closetag('div', 1) + logger.info("Finished sshd section") + return output diff --git a/logparse/parsers/sudo.py b/logparse/parsers/sudo.py new file mode 100644 index 0000000..ef74ec2 --- /dev/null +++ b/logparse/parsers/sudo.py @@ -0,0 +1,56 @@ +# +# sudo.py +# +# Get number of sudo sessions for each user +# + +import re + +from ..formatting import * +from ..util import readlog, resolve +from .. import config + +import logging +logger = logging.getLogger(__name__) + +def parse_log(): + output = '' + logger.debug("Starting sudo section") + output += opentag('div', 1, 'sudo', 'section') + logger.debug("Searching for matches in {0}".format(config.prefs['logs']['auth'])) + umatches = re.findall('.*sudo:session\): session opened.*', readlog(config.prefs['logs']['auth'])) + num = sum(1 for line in umatches) # total number of sessions + users = [] + data = [] + for match in umatches: + user = re.search('.*session opened for user root by (\S*)\(uid=.*\)', match).group(1) + exists = [i for i, item in enumerate(users) if re.search(user, item[0])] + if (exists == []): + users.append([user, 1]) + else: + users[exists[0]][1] += 1 + commands = [] + cmatches = re.findall('sudo:.*COMMAND\=(.*)', readlog(config.prefs['logs']['auth'])) + for cmd in cmatches: + commands.append(cmd) + logger.debug("Finished parsing sudo sessions") + + output += writetitle("sudo") + subtitle = plural("sudo session", num) + " for" + if (len(users) == 1): + logger.debug("found " + str(num) + " sudo session(s) for user " + str(users[0])) + subtitle += ' ' + users[0][0] + output += writedata(subtitle) + else: + for user in users: + data.append(user[0] + ' (' + str(user[1]) + ')') + logger.debug("found " + str(num) + " sudo sessions for users " + str(data)) + output += writedata(subtitle, data) + if (len(commands) > 0): + commands = addtag(commands, 'code') + commands = orderbyfreq(commands) + commands = truncl(commands, config.prefs['maxcmd']) + output += writedata("top sudo commands", [c for c in commands]) + output += closetag('div', 1) + return output + logger.info("Finished sudo section") diff --git a/logparse/parsers/temperature.py b/logparse/parsers/temperature.py new file mode 100644 index 0000000..35d88e8 --- /dev/null +++ b/logparse/parsers/temperature.py @@ -0,0 +1,133 @@ +# +# temperature.py +# +# Find current temperature of various system components (CPU, motherboard, +# hard drives, ambient). Detection of motherboard-based temperatures (CPU +# etc) uses the pysensors library, and produces a similar output to +# lmsensors. HDD temperatures are obtained from the hddtemp daemon +# (http://www.guzu.net/linux/hddtemp.php) which was orphaned since 2007. For +# hddtemp to work, it must be started in daemon mode, either manually or with +# a unit file. Manually, it would be started like this: +# +# sudo hddtemp -d /dev/sda /dev/sdb ... /dev/sdX +# + +import re +import sensors +import socket, sys +from telnetlib import Telnet +from typing import List, Dict, NamedTuple + +from ..formatting import * +from ..util import readlog, resolve +from .. import config + +import logging +logger = logging.getLogger(__name__) + +class Drive(NamedTuple): + path: str + model: str + temperature: int + units: str + +class HddtempClient: + + def __init__(self, host: str='localhost', port: int=7634, timeout: int=10, sep: str='|') -> None: + self.host = host + self.port = port + self.timeout = timeout + self.sep = sep + + def _parse_drive(self, drive: str) -> Drive: + drive_data = drive.split(self.sep) + return Drive(drive_data[0], drive_data[1], int(drive_data[2]), drive_data[3]) + + def _parse(self, data: str) -> List[Drive]: + line = data.lstrip(self.sep).rstrip(self.sep) # Remove first/last + drives = line.split(self.sep * 2) + return [self._parse_drive(drive) for drive in drives] + + def get_drives(self) -> List[Drive]: # Obtain data from telnet server + try: + with Telnet(self.host, self.port, timeout=self.timeout) as tn: + data = tn.read_all() + return self._parse(data.decode('ascii')) # Return parsed data + except: + logger.warning("Couldn't read data from {0}:{1}".format(self.host, self.port)) + return 1 + + +def parse_log(): + logger.debug("Starting temp section") + output = writetitle("temperatures") + output += opentag('div', 1, 'temp', 'section') + + # cpu temp + + sensors.init() + coretemps = [] + pkgtemp = 0 + systemp = 0 + try: + for chip in sensors.iter_detected_chips(): + for feature in chip: + if "Core" in feature.label: + coretemps.append([feature.label, feature.get_value()]) + logger.debug("found core " + feature.label + " at temp " + str(feature.get_value())) + if "CPUTIN" in feature.label: + pkgtemp = str(feature.get_value()) + logger.debug("found cpu package at temperature " + pkgtemp) + if "SYS" in feature.label: + systemp = feature.get_value() + logger.debug("found sys input " + feature.label + " at temp " + str(feature.get_value())) + logger.debug("Core temp data is: " + str(coretemps)) +# core_avg = reduce(lambda x, y: x[1] + y[1], coretemps) / len(coretemps) + core_avg = sum(core[1] for core in coretemps) / len(coretemps) + logger.debug("average cpu temp is " + str(core_avg)) + coretemps.append(["avg", str(core_avg)]) + coretemps.append(["pkg", pkgtemp]) + coretemps = [x[0] + ": " + str(x[1]) + DEG + CEL for x in coretemps] + finally: + sensors.cleanup() + + if (systemp != 0): + output += writedata("sys: " + str(systemp) + DEG) + if (coretemps != ''): + output += writedata("cores", coretemps) + + logger.info("Finished reading onboard temperatures") + + # drive temp + + # For this to work, `hddtemp` must be running in daemon mode. + # Start it like this (bash): sudo hddtemp -d /dev/sda /dev/sdX... + + received = '' + sumtemp = 0.0 + data = "" + fields = [] + + client = HddtempClient(host=config.prefs['hddtemp']['host'], port=int(config.prefs['hddtemp']['port']), sep=config.prefs['hddtemp']['separator'], timeout=int(config.prefs['hddtemp']['timeout'])) + drives = client.get_drives() + for drive in sorted(drives, key=lambda x: x.path): + if drive.path in config.prefs['hddtemp']['drives']: + sumtemp += drive.temperature + fields.append(("{0} ({1})".format(drive.path, drive.model) if config.prefs['hddtemp']['show-model'] else drive.path) + ": {0}{1}{2}".format(drive.temperature, DEG, drive.units)) + else: + drives.remove(drive) + logger.debug("Ignoring drive {0} ({1})due to config".format(drive.path, drive.model)) + logger.debug("Received drive info: " + str(drives)) + + hddavg = '{0:.1f}{1}{2}'.format(sumtemp/len(drives), DEG, drives[0].units) # use units of first drive + logger.debug("Sum of temperatures: {}; Number of drives: {}; => Avg disk temp is {}".format(str(sumtemp), str(len(drives)), hddavg)) + fields.append("avg: " + str(hddavg)) + + if (prefs['hddtemp']['drives'] != ''): + output += writedata("disks", fields) + logger.info("Finished processing drive temperatures") + + + output += closetag('div', 1) + logger.info("Finished temp section") + return output diff --git a/logparse/parsers/zfs.py b/logparse/parsers/zfs.py new file mode 100644 index 0000000..09db33e --- /dev/null +++ b/logparse/parsers/zfs.py @@ -0,0 +1,58 @@ +# +# zfs.py +# +# Look through ZFS logs to find latest scrub and its output. +# Note that ZFS doesn't normally produce logs in /var/log, so for this to +# work, we must set up a cron job to dump `zpool iostat` into a file (hourly +# is best): +# +# zpool iostat > /var/log/zpool.log && zpool status >> /var/log/zpool.log +# +# The file gets overwritten every hour, so if more than one scrub occurs +# between logparse runs, it will only get the latest one. +# +# TODO: add feature to specify pools to check in config file +# TODO: set critical value for scrub data repair +# + +import re +import sys, traceback + +from ..formatting import * +from ..util import readlog, resolve +from .. import config + +import logging +logger = logging.getLogger(__name__) + +def parse_log(): + output = '' + logger.debug("Starting zfs section") + output += opentag('div', 1, 'zfs', 'section') + zfslog = readlog(config.prefs['logs']['zfs']) + logger.debug("Analysing zpool log") + pool = re.search('.*---\n(\w*)', zfslog).group(1) + scrub = re.search('.* scrub repaired (\d+\s*\w+) in .* with (\d+) errors on (\w+)\s+(\w+)\s+(\d+)\s+(\d{1,2}:\d{2}):\d+\s+(\d{4})', zfslog) + logger.debug("Found groups {0}".format(scrub.groups())) + iostat = re.search('.*---\n\w*\s*(\S*)\s*(\S*)\s', zfslog) + scrubrepairs = scruberrors = scrubdate = None + try: + scrubrepairs = scrub.group(1) + scruberrors = scrub.group(2) + scrubdate = ' '.join(scrub.groups()[2:-1]) + except Exception as e: + logger.debug("Error getting scrub data: " + str(e)) + traceback.print_exc(limit=2, file=sys.stdout) + alloc = iostat.group(1) + free = iostat.group(2) + output += writetitle("zfs") + if (scrubdate != None): + subtitle = "Scrub of " + pool + " on " + scrubdate + data = [scrubrepairs + " repaired", scruberrors + " errors", alloc + " used", free + " free"] + else: + subtitle = pool + data = [alloc + " used", free + " free"] + output += writedata(subtitle, data) + output += closetag('div', 1) + logger.info("Finished zfs section") + return output diff --git a/logparse/util.py b/logparse/util.py new file mode 100644 index 0000000..1769033 --- /dev/null +++ b/logparse/util.py @@ -0,0 +1,82 @@ +# +# utilities.py +# +# Commonly used general functions +# + +import re +import os +import socket +import inspect + +import logging +logger = logging.getLogger(__name__) + +from .config import prefs +from pkg_resources import Requirement, resource_filename + +def hostname(path): # get the hostname of current server + hnfile = open(path, 'r') + hn = re.search('^(\w*)\n*', hnfile.read()).group(1) + return hn + +def getlocaldomain(): # get the parent fqdn of current server + domain = socket.getfqdn().split('.', 1) # Note: if socket.fetfqdn() returns localhost, make sure the first entry in /etc/hosts contains the fqdn + if len(domain) != 2: + logger.warning('Could not get domain of this server, only hostname. Please consider updating /etc/hosts') + return 'localdomain' + else: + return domain[-1] + +def resolve(ip, fqdn = 'host-only'): # try to resolve an ip to hostname + # Possible values for fqdn: + # fqdn show full hostname and domain + # fqdn-implicit show hostname and domain unless local + # host-only only show hostname + # ip never resolve anything + # resolve-domains defined in individual sections of the config take priority over global config + + if not fqdn: + fqdn = prefs['resolve-domains'] + + if fqdn == 'ip': + return(ip) + + try: + socket.inet_aton(ip) # succeeds if text contains ip + hn = socket.gethostbyaddr(ip)[0] # resolve ip to hostname + if fqdn == 'fqdn-implicit' and hn.split('.', 1)[1] == getlocaldomain(): + return(hn.split('.')[0]) + elif fqdn == 'fqdn' or fqdn == 'fqdn-implicit': + return(hn) + elif fqdn == 'host-only': + return(hn.split('.')[0]) + else: + logger.warning("invalid value for fqdn config") + return(hn) + except socket.herror: + # cannot resolve ip + logger.debug(ip + " cannot be found, might not exist anymore") + return(ip) + except (OSError, socket.error): # socket.error for Python 2 compatibility + # already a hostname + logger.debug(ip + " is already a hostname") + return(ip) + except Exception as err: + logger.warning("failed to resolve hostname for " + ip + ": " + str(err)) + return(ip) # return ip if no hostname exists + +def readlog(path = None, mode = 'r'): # read file + if (path == None): + logger.error("no path provided") + return + else: + if (os.path.isfile(path) is False): + logger.error("Log at {0} was requested but does not exist".format(path)) + return '' + else: + try: + return open(path, mode).read() + except IOError or OSError as e: + logger.warning("Error reading log at {0}: {1}".format(path, e.strerror)) + return 1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b400997 --- /dev/null +++ b/setup.py @@ -0,0 +1,50 @@ +from setuptools import setup +from os import path +# io.open is needed for projects that support Python 2.7 +# It ensures open() defaults to text mode with universal newlines, +# and accepts an argument to specify the text encoding +# Python 3 only projects can skip this import +from io import open + +# Import main module so we can set the version +import logparse + +here = path.abspath(path.dirname(__file__)) +__version__ = '1.0' # https://www.python.org/dev/peps/pep-0440/ https://packaging.python.org/en/latest/single_source_version.html + +# Get the long description from the README file +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +setup( + name='logparse', # https://packaging.python.org/specifications/core-metadata/#name + version=logparse.__version__, # https://www.python.org/dev/peps/pep-0440/ https://packaging.python.org/en/latest/single_source_version.html + description='Summarise server logs', + long_description_content_type='text/markdown', + url='https://git.lorimer.id.au/logparse.git', + author='Andrew Lorimer', + author_email='andrew@lorimer.id.au', + classifiers=[ # https://pypi.org/classifiers/ + 'Development Status :: 4 - Beta', + 'Intended Audience :: System Administrators', + 'Topic :: System :: Systems Administration', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + ], + keywords='logparse log parse analysis summary monitor email server', + packages=['logparse', 'logparse.parsers'], +# python_requires='=2.5.*, =2.7.*', # https://packaging.python.org/guides/distributing-packages-using-setuptools/#python-requires + python_requires='>=3', # https://packaging.python.org/guides/distributing-packages-using-setuptools/#python-requires + install_requires=['premailer', 'requests', 'pyyaml'], # https://packaging.python.org/en/latest/requirements.html +# extras_require={'dev': ['check-manifest'], 'test': ['coverage']}, # Additional dependencies; install with `pip install sampleproject[dev]` + data_files=[('/etc/logparse', ['logparse.conf', 'header.html', 'main.css'])], # installed to /etc/logparse + project_urls={ + 'Readme': 'https://git.lorimer.id.au/logparse.git/about', + 'Source': 'https://git.lorimer.id.au/logparse.git', + 'Contact': 'mailto:bugs@lorimer.id.au', + }, + entry_points = { + 'console_scripts': ['logparse=logparse.interface:main'], + } +)