#! /usr/bin/python

import argparse, logging, os, shutil, re, subprocess, sys, requests, glob, socket, sensors, datetime, time, operator, premailer
from sys import stdin
from collections import namedtuple, defaultdict

diskstat = namedtuple('diskstat', ['cap', 'alloc', 'free', 'ratio'])
drivetemp = namedtuple('drivetemp', ['name', 'temp', 'units'])


AUTHPATH = "/var/log/auth.log"
CRONPATH = "/var/log/cron.log"
SYSPATH = "/var/log/syslog"
SMBDDIR = "/var/log/samba"
ZFSPATH = "/var/log/zpool.log"
ALLOCPATH = "/tmp/alloc"
POSTFIXPATH = "/var/log/mail.log"
HTTPDSTATUS = "http://localhost/server-status"
HTTPDDIR = "/var/log/apache2"
HOSTNAMEPATH = "/etc/hostname"
DUPATHS = ["/home/andrew", "/mnt/andrew"]
HDDTEMPS = ["/dev/sda", "/dev/sdc", "/dev/sdd", "/dev/sde"]
HDDTEMPPORT = 7634
SUMMARYPATH = "/mnt/andrew/temp/logparse-test.html"
OUTPUTPATH = "/mnt/andrew/temp/logparse-test2.html"
MAILPATH = "/mnt/andrew/temp/log-parse-test-3.html"
HEADERPATH = os.path.dirname(os.path.realpath(__file__)) + "/header.html"
STYLEPATH = os.path.dirname(os.path.realpath(__file__)) + "/main.css"
MAILOUT = ""
HTMLOUT = ""
TXTOUT = ""
TITLE = "logparse"
MAXLIST = 10
CMDNO = 3
MAILSUBJECT = "logparse from $hostname$"
VERSION = "v0.1"
# DEG = u'\N{DEGREE SIGN}'.encode('utf-8')
DEG = 'C'

# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('logparse')

# Get arguments
parser = argparse.ArgumentParser(description='grab logs of some common services and send them by email')
parser.add_argument('-t','--to', help='mail recipient (\"to\" address)',required=False)
to = parser.parse_args().to

def __main__():
    logger.info("Beginning log analysis at " + str(timenow))
    if (to == None):
        logger.info("no recipient address provided, outputting to stdout")
    else:
        logger.info("email will be sent to " + to)

    global tempfile
    tempfile = open(SUMMARYPATH, 'w+')
    tempfile.write(header(HEADERPATH))
    opentag('div', 1, 'main')
    sshd()
    sudo()
    cron()
    nameget()
    httpd()
    smbd()
    postfix()
    zfs()
    temp()
    du()
    for tag in ['div', 'body', 'html']:
        closetag(tag, 1)
    tempfile.close()
    mailprep(SUMMARYPATH, MAILPATH)
    if (to != None):
        logger.debug("sending email")
        ms = subject(MAILSUBJECT)
        cmd = "cat " + MAILPATH + " | mail --debug-level=10 -a 'Content-type: text/html'  -s '" + ms + "' " + to
        logger.debug(cmd)
        subprocess.call(cmd, shell=True)
        logger.info("sent email")


def writetitle(title):
    if (title == '' or '\n' in title):
        logger.error("invalid title")
        return
        logger.debug("writing title for " + title)
    tag('h2', 0, title)

def writedata(subtitle, data = None):   # write title and data to tempfile
    if (subtitle == ""):
        loggger.warning("no subtitle provided.. skipping section")
        return

    if (data == None):
        logger.debug("no data provided.. just printing subtitle")
        tag('p', 0, subtitle)
    else:
        logger.debug("received data " + str(data))
        subtitle += ':'
        if (len(data) == 1):
            tag('p', 0, subtitle + ' ' + data[0])
        else:
            tag('p', 0, subtitle)
            opentag('ul', 1)
            for datum in data:
                logger.debug("printing datum " + datum)
                tag('li', 0, datum)
            closetag('ul', 1)

def opentag(tag, block = 0, id = None, cl = None):   # write html opening tag
    if (block == 1):
        tempfile.write('\n')
    tempfile.write('<' + tag)
    if (id != None):
        tempfile.write(" id='" + id + "'")
    if (cl != None):
        tempfile.write(" class='" + cl + "'")
    tempfile.write('>')
    if (block == 1):
        tempfile.write('\n')

def closetag(tag, block = 0):  # write html closing tag
    if (block == 0):
        tempfile.write("</" + tag + ">")
    else:
        tempfile.write("\n</" + tag + ">\n")

def tag(tag, block = 0, content = ""):  # write html opening tag, content, and html closing tag
    opentag(tag, block)
    tempfile.write(content)
    closetag(tag, block)

def header(template):   # return a parsed html header from file
    headercontent = open(template, 'r').read()
    headercontent = varpattern.sub(lambda m: varfilter[re.escape(m.group(0))], headercontent)
    return headercontent

def subject(template):
    r = varpattern.sub(lambda m: varfilter[re.escape(m.group(0))], template)
    logger.debug("returning subject line " + r)
    return r

def hostname(): # get the hostname
    hnfile = open(HOSTNAMEPATH, 'r')
    hn = re.search('^(.*)\n*', hnfile.read()).group(1)
    return hn

def resolve(ip):        # try to resolve an ip to hostname
    logger.debug("trying to resolve ip " + ip)
    try:
        socket.inet_aton(ip)  # succeeds if text contains ip
        hn = socket.gethostbyaddr(ip)[0].split(".")[0] # resolve ip to hostname
        logger.debug("found hostname " + hn)
        return(hn)
    except:
        logger.debug("failed to resolve hostname for " + ip)
        return(ip)  # return ip if no hostname exists

def plural(noun, quantity): # return "1 noun" or "n nouns"
    if (quantity == 1):
        return(str(quantity) + " " + noun)
    else:
        return(str(quantity) + " " + noun + "s")

def parsesize(num, suffix='B'):     # return human-readable size from number of bytes
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)

def readlog(path = None, mode = 'r'):   # read file, substituting known paths
    if (path == None):
        logger.error("no path provided")
        return
    else:
        path = pathpattern.sub(lambda m: pathfilter[re.escape(m.group(0))], path)
        if (os.path.isfile(path) is False):
            logger.error(path + " does not exist")
            return ''
        else:
            return open(path, mode).read()

def writelog(path = None, content = "", mode = 'w'):   # read file, substituting known paths
    if (path == None or content == None):
        logger.error("invalid usage of writelog")
        return
    else:
        path = pathpattern.sub(lambda m: pathfilter[re.escape(m.group(0))], path)
        file = open(path, mode)
        file.write(content)
        file.close()

def getusage(path):     # Get disk usage statistics
    disk = os.statvfs(path)
    cap = float(disk.f_bsize*disk.f_blocks)                     # disk capacity
    alloc = float(disk.f_bsize*(disk.f_blocks-disk.f_bfree))    # size of path
    free = float(disk.f_bsize*disk.f_bfree)                     # free space on disk (blocks, not usable space)
    ratio = alloc / cap * 100                                   # percentage used
    return diskstat(cap, alloc, free, ratio)

def orderbyfreq(l):     # order a list by the frequency of its elements and remove duplicates
    temp_l = l[:]
    l = list(set(l))
    l = [[i, temp_l.count(i)] for i in l]   # add count of each element
    l.sort(key=lambda x:temp_l.count(x[0])) # sort by count
    l = [i[0] + ' (' + str(i[1]) + ')' for i in l]  # put element and count into string
    l = l[::-1]     # reverse
    return l

def addtag(l, tag):  # add prefix and suffix tags to each item in a list
    l2 = ['<' + tag + '>' + i + '</' + tag + '>' for i in l]
    return l2

def truncl(input, limit):      # truncate list
    if (len(input) > limit):
        more = str(len(input) - limit)
        output = input[:limit]
        output.append("+ " + more + " more")
        return(output)
    else:
        return(input)

def mailprep(inputpath, outputpath, *stylesheet):
    logger.debug("converting stylesheet to inline tags")
    old = readlog(inputpath)
    pm = premailer.Premailer(old, external_styles=STYLEPATH)
    MAILOUT = pm.transform()
    logger.info("converted stylesheet to inline tags")
    file = open(outputpath, 'w')
    file.write(MAILOUT)
    file.close()
    logger.info("written to temporary mail file")


#
#
#

def sshd():
    logger.debug("starting sshd section")
    opentag('div', 1, 'sshd', 'section')
    matches = re.findall('.*sshd.*Accepted publickey for .* from .*', readlog('auth'))    # get all logins
    users = []  # list of users with format [username, number of logins] for each item
    data = []
    num = sum(1 for x in matches)     # total number of logins
    for match in matches:
        entry = re.search('^.*publickey\sfor\s(\w*)\sfrom\s(\S*)', match)  # [('user', 'ip')]

        user = entry.group(1)
        ip = entry.group(2)

        userhost = user + '@' + resolve(ip)
        exists = [i for i, item in enumerate(users) if re.search(userhost, item[0])]
        if (exists == []):
            users.append([userhost, 1])
        else:
            users[exists[0]][1] += 1

    writetitle('sshd')
    subtitle = plural('login', num) + ' from'
    if (len(users) == 1):             # if only one user, do not display no of logins for this user
        logger.debug("found " + str(len(matches)) + " ssh logins for user " + users[0][0])
        subtitle += ' ' + users[0][0]
        writedata(subtitle)
    else:
        for user in users:
            data.append(user[0] + ' (' + str(user[1]) + ')')
            if len(data) > MAXLIST:     # if there are lots of users, truncate them
                data.append('+ ' + str(len(users) - MAXLIST - 1) + " more")
                break
        logger.debug("found " + str(len(matches)) + " ssh logins for users " + str(data))
        writedata(subtitle, data)
    closetag('div', 1)
    logger.info("finished sshd section")

#
#
#

def sudo():
    logger.debug("starting sudo section")
    opentag('div', 1, 'sudo', 'section')
    umatches = re.findall('.*sudo:session\): session opened.*', readlog('auth'))
    num = sum(1 for line in umatches)    # total number of sessions
    users = []
    data = []
    for match in umatches:
        user = re.search('.*session opened for user root by (\S*)\(uid=.*\)', match).group(1)
        exists = [i for i, item in enumerate(users) if re.search(user, item[0])]
        if (exists == []):
            users.append([user, 1])
        else:
            users[exists[0]][1] += 1
    commands = []
    cmatches = re.findall('sudo:.*COMMAND\=(.*)', readlog('auth'))
    for cmd in cmatches:
        commands.append(cmd)
    logger.debug("found the following commands: " + str(commands))

    writetitle("sudo")
    subtitle = plural("sudo session", num) + " for"
    if (len(users) == 1):
        logger.debug("found " + str(num) + " sudo session(s) for user " + str(users[0]))
        subtitle += ' ' + users[0][0]
        writedata(subtitle)
    else:
        for user in users:
            data.append(user[0] + ' (' + str(user[1]) + ')')
        logger.debug("found " + str(num) + " sudo sessions for users " + str(data))
        writedata(subtitle, data)
    if (len(commands) > 0):
        commands = addtag(commands, 'code')
        commands = orderbyfreq(commands)
        commands = truncl(commands, CMDNO)
        writedata("top sudo commands", [c for c in commands])
    closetag('div', 1)
    logger.info("finished sudo section")

#
#
#

def cron():
    logger.debug("starting cron section")
    opentag('div', 1, 'cron', 'section')
    matches = re.findall('.*CMD\s*\(\s*(?!.*cd)(.*)\)', readlog('cron'))
    num = sum(1 for line in matches)
    commands = []
    for match in matches:
        commands.append(str(match))
    # commands.append([str(match)for match in matches])
    logger.debug("found cron command " + str(commands))
    logger.info("found " + str(num) + " cron jobs")
    subtitle = str(num) + " cron jobs run"
    writetitle("cron")
    writedata(subtitle)
    if (matches > 0):
        commands = addtag(commands, 'code')
        commands = orderbyfreq(commands)
        commands = truncl(commands, CMDNO)
        writedata("top cron commands", [c for c in commands])
    closetag('div', 1)
    logger.info("finished cron section")

#
#
#

def nameget():
    logger.debug("starting nameget section")
    opentag('div', 1, 'nameget', 'section')
    syslog = readlog('sys')
    failed = re.findall('.*nameget.*downloading of (.*) from .*failed.*', syslog)
    n_f = sum(1 for i in failed)
    l_f = []
    for i in failed:
        l_f.append(i)
    logger.debug("the following downloads failed: " + str(l_f))
    succ = re.findall('.*nameget.*downloaded\s(.*)', syslog)
    n_s = sum(1 for i in succ)
    l_s = []
    for i in succ:
        l_s.append(i)
    logger.debug("the following downloads succeeded: " + str(l_f))
    logger.debug("found " + str(n_s) + " successful downloads, and " + str(n_f) + " failed attempts")
    writetitle("nameget")
    writedata(str(n_s) + " succeeded", truncl(orderbyfreq(l_s), CMDNO))
    writedata(str(n_f) + " failed", truncl(orderbyfreq(l_f), CMDNO))
    closetag('div', 1)
    logger.info("finished nameget section")

#
#
#

def httpd():
    logger.info("starting httpd section")
    opentag('div', 1, 'httpd', 'section')
    accesslog = readlog("httpd/access.log")
    a = len(accesslog.split('\n'))
    errorlog = readlog("httpd/error.log")
    e = len(errorlog.split('\n'))
    data_b = 0
    ips = []
    files = []
    useragents = []
    errors = []
    notfound = []
    unprivileged = []

    for line in accesslog.split('\n'):
        fields = re.search('^(\S*) .*GET (\/.*) HTTP/\d\.\d\" 200 (\d*) \"(.*)\".*\((.*)\;', line)
        try:
            ips.append(fields.group(1))
            files.append(fields.group(2))
            useragents.append(fields.group(5))
            logger.debug("transferred " + fields.group(3) + " bytes in this request")
            data_b += int(fields.group(3))
            logger.debug("data_b is now " + str(data_b))
        except Exception as error:
            if type(error) is AttributeError:
                logger.debug("attributeerrror: " + str(error))
            else:
                logger.warning("error processing httpd access log: " + str(error))
    logger.debug(str(data_b) + " bytes transferred")
    data_h = parsesize(data_b)
    writetitle("apache")

    logger.debug("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors")
    if (a > 0):
        logger.debug("found the following requests: " + str(files))
        files = addtag(files, 'code')
        files = orderbyfreq(files)
        files = truncl(files, CMDNO)
        writedata(str(a) + " requests", files)
    if (ips != None):
        logger.debug("found the following ips: " + str(ips))
        ips = addtag(ips, 'code')
        ips = orderbyfreq(ips)
        n_ip = str(len(ips))
        ips = truncl(ips, CMDNO)
        writedata(n_ip + " unique clients", ips)
    if (useragents != None):
        logger.debug("found the following useragents: " + str(useragents))
        useragents = addtag(useragents, 'code')
        useragents = orderbyfreq(useragents)
        n_ua = str(len(useragents))
        useragents = truncl(useragents, CMDNO)
        writedata(n_ua + " unique devices", useragents)

    writedata(data_h + " transferred")
    writedata(str(e) + " errors")

    closetag('div', 1)
    logger.info("finished httpd section")

#
#
#

def httpdsession():
    # logger.debug("starting httpd section")
    opentag('div', 1, 'httpd', 'section')
    httpdlog = requests.get(HTTPDSTATUS).content
    uptime = re.search('.*uptime: (.*)<', httpdlog).group(1)
    uptime = re.sub(' minute[s]', 'm', uptime)
    uptime = re.sub(' second[s]', 's', uptime)
    uptime = re.sub(' day[s]', 's', uptime)
    uptime = re.sub(' month[s]', 'mo', uptime)
    accesses = re.search('.*accesses: (.*) - .*', httpdlog).group(1)
    traffic = re.search('.*Traffic: (.*)', httpdlog).group(1)
    return("<br /><strong>httpd session: </strong> up " + uptime + ", " + accesses + " requests, " + traffic + " transferred")
    closetag('div', 1)
    # logger.info("finished httpd section")

#
#
#

def smbd():
    logger.debug("starting smbd section")
    opentag('div', 1, 'smbd', 'section')
    files = glob.glob(SMBDDIR + "/log.*[!\.gz][!\.old]")    # find list of logfiles
    logger.debug("found log files " + str(files))
    n_auths = 0         # total number of logins from all users
    sigma_auths = []    # contains users
    output = ""

    for file in files:  # one log file for each client

        logger.debug("looking at file " + file)

        # find the machine (ip or hostname) that this file represents
        ip = re.search('log\.(.*)', file).group(1)    # get ip or hostname from file path (/var/log/samba/log.host)
        host = resolve(ip)

        # count number of logins from each user
        matches = re.findall('.*sam authentication for user \[(.*)\] succeeded.*', readlog(file))
        for match in matches:
            userhost = match + "@" + host
            sigma_auths.append(userhost)
            # exists = [i for i, item in enumerate(sigma_auths) if re.search(userhost, item[0])]
            # if (exists == []):
            #     sigma_auths.append([userhost, 1])
            # else:
            #     sigma_auths[exists[0]][1] += 1
            n_auths += 1
    writetitle("samba")
    subtitle = plural("login", n_auths) + " from"
    if (len(sigma_auths) == 1):             # if only one user, do not display no of logins for this user
        subtitle += ' ' + sigma_auths[0][0]
        writedata(subtitle)
    else:       # multiple users
        sigma_auths = orderbyfreq(sigma_auths)
        sigma_auths = truncl(sigma_auths, CMDNO)
        logger.debug("found " + str(n_auths) + " samba logins for users " + str(sigma_auths))
        writedata(subtitle, sigma_auths)
    closetag('div', 1)
    logger.info("finished smbd section")

#
#
#

def postfix():
    logger.debug("starting postfix section")
    opentag('div', 1, 'postfix', 'section')
    messages = re.findall('.*from\=<(.*)>, size\=(\d*),.*\n.*to=<(.*)>', readlog('postfix'))
    r = []
    s = []
    size = 0
    for message in messages:
        r.append(message[2])
        s.append(message[0])
        size += int(message[1])
    # size = sum([int(x) for x in messages])
    size = parsesize(size)
    n = str(len(messages))
    writetitle("postfix")

    if (len(r) > 0):
        s = list(set(r))    # unique recipients
        if (len(s) > 1):
            r = orderbyfreq(r)
            r = truncl(r, CMDNO)
            writedata(n + " messages sent to", r)
        else:
            writedata(n + " messages sent to " + r[0])
    else:
        writedata(n + " messages sent")
    writedata("total of " + size)
    closetag('div', 1)
    logger.info("finished postfix section")

#
#
#

def zfs():
    logger.debug("starting zfs section")
    opentag('div', 1, 'zfs', 'section')
    zfslog = readlog('zfs')
    logger.debug("zfs log is " + zfslog)
    logger.debug("got zfs logfile\n" + zfslog + "---end log---")
    pool = re.search('.*---\n(\w*)', zfslog).group(1)
    scrub = re.search('.*scrub repaired (\d*) in \d*h\d*m with (\d*) errors on (\S*\s)(\S*)\s(\d+\s)', zfslog)
    iostat = re.search('.*---\n\w*\s*(\S*)\s*(\S*)\s', zfslog)
    scrubrepairs = scruberrors = scrubdate = None
    try:
        scrubrepairs = scrub.group(1)
        scruberrors = scrub.group(2)
        scrubdate = scrub.group(3) + scrub.group(5) + scrub.group(4)
    except:
        logger.debug("error getting scrub data")
    alloc = iostat.group(1)
    free = iostat.group(2)
    writetitle("zfs")
    if (scrubdate != None):
        subtitle = "Scrub of " + pool + " on " + scrubdate
        data = [scrubrepairs + " repaired", scruberrors + " errors", alloc + " used", free + " free"]
    else:
        subtitle = pool
        data = [alloc + " used", free + " free"]
    writedata(subtitle, data)
    closetag('div', 1)
    logger.info("finished zfs section")

#
#
#

def temp():
    logger.debug("starting temp section")
    opentag('div', 1, 'temp', 'section')
    sensors.init()
    coretemps = []
    pkgtemp = 0
    systemp = 0
    try:
        print(sensors.iter_detected_chips())
        for chip in sensors.iter_detected_chips():
            for feature in chip:
                if "Core" in feature.label:
                    coretemps.append([feature.label, feature.get_value()])
                    logger.debug("found core " + feature.label + " at temp " + str(feature.get_value()))
                if "CPUTIN" in feature.label:
                    pkgtemp = str(feature.get_value())
                    logger.debug("found cpu package at temperature " + pkgtemp)
                if "SYS" in feature.label:
                    systemp = feature.get_value()
                    logger.debug("found sys input " + feature.label + " at temp " + str(feature.get_value()))
        core_avg = reduce(lambda x, y: x[1] + y[1], coretemps) / len(coretemps)
        logger.debug("average cpu temp is " + str(core_avg))
        coretemps.append(["avg", str(core_avg)])
        coretemps.append(["pkg", pkgtemp])
        coretemps = [x[0] + ": " + str(x[1]) + DEG for x in coretemps]
    finally:
        sensors.cleanup()

    # For this to work, `hddtemp` must be running in daemon mode.
    # Start it like this (bash):   sudo hddtemp -d /dev/sda /dev/sdX...
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect(('localhost',HDDTEMPPORT))
    output = s.recv(4096)
    output += s.recv(4096)
    s.close()
    hddtemps = []
    for drive in re.split('\|{2}', output):
        try:
            fields = re.search('\|*(/dev/sd.)\|.*\|(\d+)\|(.)', drive)
            name = fields.group(1)
            temp = float(fields.group(2))
            units = fields.group(3)
            hddtemps.append(drivetemp(name, temp, units))
        except:
            pass
    hddtotal = 0
    data = []
    for drive in hddtemps:
        data.append(drive.name + ': ' + str(drive.temp) + drive.units)
        logger.debug("found disk " + drive.name + " at " + str(drive.temp))
        hddtotal += drive.temp
    logger.debug("found " + str(len(hddtemps)) + " disks")
    logger.debug("sum of disk temps is " + str(hddtotal))
    hddavg = hddtotal/float(len(hddtemps))
    logger.debug("avg disk temp is " + str(hddavg))
    data.append("avg: " + str(hddavg))
    writetitle("temperatures")
    if (systemp != 0):
        writedata("sys: " + str(systemp) + DEG)
    if (coretemps != ''):
        writedata("cores", coretemps)
    if (hddtemps != ''):
        writedata("disks", data)

    closetag('div', 1)
    logger.info("finished temp section")

#
#
#

def du():
    logger.debug("starting du section")
    opentag('div', 1, 'du', 'section')
    out = []
    content = readlog('alloc')
    contentnew = ""
    for p in DUPATHS:
        alloc_f = getusage(p).alloc
        delta = None
        try:
            alloc_i = re.search(p + '\t(.*)\n', content).group(1)
            delta = alloc_f - float(alloc_i)
        except:
            pass
        logger.debug("delta is " + str(delta))
        if (delta == None):
            out.append([p, "used " + parsesize(alloc_f)])
        else:
            out.append([p, "used " + parsesize(alloc_f), "delta " + parsesize(delta)])
        contentnew += (p + '\t' + str(alloc_f) + '\n')
    writelog('alloc', contentnew)

    writetitle("du")
    logger.debug("disk usage data is " + str(out))
    for path in out:
        writedata(path[0], [p for p in path[1:]])

    closetag('div', 1)
    logger.info("finished du section")

#
#
#

timenow = time.strftime("%H:%M:%S")
datenow = time.strftime("%x")

pathfilter = {"auth": AUTHPATH, "cron": CRONPATH, "sys": SYSPATH, "postfix": POSTFIXPATH, "smb": SMBDDIR, "zfs": ZFSPATH, "alloc": ALLOCPATH, "httpd": HTTPDDIR, "header": HEADERPATH}
pathfilter = dict((re.escape(k), v) for k, v in pathfilter.iteritems())
pathpattern = re.compile("|".join(pathfilter.keys()))

varfilter = {"$title$": TITLE, "$date$": datenow, "$time$": timenow, "$hostname$": hostname(), "$version$": VERSION}
varfilter = dict((re.escape(k), v) for k, v in varfilter.iteritems())
varpattern = re.compile("|".join(varfilter.keys()))


__main__()