logparse.pyon commit initial commit (059f3b7)
   1#! /usr/bin/python
   2
   3import argparse, logging, os, shutil, re, subprocess, sys, requests, glob, socket, sensors, datetime, time, operator
   4from sys import stdin
   5from collections import namedtuple, defaultdict
   6
   7diskstat = namedtuple('diskstat', ['cap', 'alloc', 'free', 'ratio'])
   8drivetemp = namedtuple('drivetemp', ['name', 'temp', 'units'])
   9
  10
  11AUTHPATH = "/var/log/auth.log"
  12CRONPATH = "/var/log/cron.log"
  13SYSPATH = "/var/log/syslog"
  14SMBDDIR = "/var/log/samba"
  15ZFSPATH = "/var/log/zpool.log"
  16ALLOCPATH = "/tmp/alloc"
  17POSTFIXPATH = "/var/log/mail.log"
  18HTTPDSTATUS = "http://localhost/server-status"
  19HTTPDDIR = "/var/log/apache2"
  20HOSTNAMEPATH = "/etc/hostname"
  21DUPATHS = ["/home/andrew", "/mnt/andrew"]
  22HDDTEMPS = ["/dev/sda", "/dev/sdc", "/dev/sdd", "/dev/sde"]
  23HDDTEMPPORT = 7634
  24SUMMARYPATH = "/mnt/andrew/temp/logparse-test.html"
  25HEADERPATH = "header.html"
  26STYLEPATH = "main.css"
  27OUTPUT = ""
  28TITLE = "logparse"
  29MAXLIST = 10
  30CMDNO = 3
  31MAILSUBJECT = "logparse from $host$"
  32VERSION = "v0.1"
  33
  34# Set up logging
  35logging.basicConfig(level=logging.DEBUG)
  36logger = logging.getLogger('logparse')
  37
  38# Get arguments
  39parser = argparse.ArgumentParser(description='grab logs of some common services and send them by email')
  40parser.add_argument('-t','--to', help='mail recipient (\"to\" address)',required=False)
  41to = parser.parse_args().to
  42
  43def __main__():
  44    logger.info("Beginning log analysis at " + str(timenow))
  45    if (to == None):
  46        logger.info("no recipient address provided, outputting to stdout")
  47    else:
  48        logger.info("email will be sent to " + to)
  49
  50    global tempfile
  51    tempfile = open(SUMMARYPATH, 'w+')
  52    tempfile.write(header(HEADERPATH))
  53    opentag('div', 1, 'main')
  54    sshd()
  55    sudo()
  56    cron()
  57    nameget()
  58    httpd()
  59    smbd()
  60    postfix()
  61    zfs()
  62    temp()
  63    du()
  64    for tag in ['div', 'body', 'html']:
  65        closetag(tag, 1)
  66    tempfile.close()
  67    if (to != None):
  68        logger.debug("sending email")
  69        subprocess.call("cat " + SUMMARYPATH + " | mail -a 'Content-type: text/html' -s " + subject(MAILSUBJECT) + ' ' + to, shell=True)
  70        logger.info("sent email")
  71
  72
  73def writetitle(title):
  74    if (title == '' or '\n' in title):
  75        logger.error("invalid title")
  76        return
  77        logger.debug("writing title for " + title)
  78    tag('h2', 0, title)
  79
  80def writedata(subtitle, data = None):   # write title and data to tempfile
  81    if (subtitle == ""):
  82        loggger.warning("no subtitle provided.. skipping section")
  83        return
  84
  85    tag('p', 0, subtitle)
  86    if (data == None):
  87        logger.warning("no data provided.. just printing subtitle")
  88    else:
  89        logger.debug("received data " + str(data))
  90        opentag('ul', 1)
  91        for datum in data:
  92            logger.debug("printing datum " + datum)
  93            tag('li', 0, datum)
  94        closetag('ul', 1)
  95
  96def opentag(tag, block = 0, id = None, cl = None):   # write html opening tag
  97    if (block == 1):
  98        tempfile.write('\n')
  99    tempfile.write('<' + tag)
 100    if (id != None):
 101        tempfile.write(" id='" + id + "'")
 102    if (cl != None):
 103        tempfile.write(" class='" + cl + "'")
 104    tempfile.write('>')
 105    if (block == 1):
 106        tempfile.write('\n')
 107
 108def closetag(tag, block = 0):  # write html closing tag
 109    if (block == 0):
 110        tempfile.write("</" + tag + ">")
 111    else:
 112        tempfile.write("\n</" + tag + ">\n")
 113
 114def tag(tag, block = 0, content = ""):  # write html opening tag, content, and html closing tag
 115    opentag(tag, block)
 116    tempfile.write(content)
 117    closetag(tag, block)
 118
 119def header(template):   # return a parsed html header from file
 120    headercontent = open(template, 'r').read()
 121    headercontent = varpattern.sub(lambda m: varfilter[re.escape(m.group(0))], headercontent)
 122    return headercontent
 123
 124def subject(template):
 125    return varpattern.sub(lambda m: varfilter[re.escape(m.group(0))], template)
 126
 127def hostname(): # get the hostname
 128    hnfile = open(HOSTNAMEPATH, 'r')
 129    return hnfile.read()
 130
 131def resolve(ip):        # try to resolve an ip to hostname
 132    logger.debug("trying to resolve ip " + ip)
 133    try:
 134        socket.inet_aton(ip)  # succeeds if text contains ip
 135        hn = socket.gethostbyaddr(ip)[0].split(".")[0] # resolve ip to hostname
 136        logger.debug("found hostname " + hn)
 137        return(hn)
 138    except:
 139        logger.warning("failed to resolve hostname for " + ip)
 140        return(ip)  # return ip if no hostname exists
 141
 142def plural(noun, quantity): # return "1 noun" or "n nouns"
 143    if (quantity == 1):
 144        return(str(quantity) + " " + noun)
 145    else:
 146        return(str(quantity) + " " + noun + "s")
 147
 148def parsesize(num, suffix='B'):     # return human-readable size from number of bytes
 149    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
 150        if abs(num) < 1024.0:
 151            return "%3.1f %s%s" % (num, unit, suffix)
 152        num /= 1024.0
 153    return "%.1f%s%s" % (num, 'Yi', suffix)
 154
 155def readlog(path = None, mode = 'r'):   # read file, substituting known paths
 156    if (path == None):
 157        logger.error("no path provided")
 158        return
 159    else:
 160        path = pathpattern.sub(lambda m: pathfilter[re.escape(m.group(0))], path)
 161        return open(path, mode).read()
 162
 163def writelog(path = None, content = "", mode = 'w'):   # read file, substituting known paths
 164    if (path == None or content == None):
 165        logger.error("invalid usage of writelog")
 166        return
 167    else:
 168        path = pathpattern.sub(lambda m: pathfilter[re.escape(m.group(0))], path)
 169        file = open(path, mode)
 170        file.write(content)
 171        file.close()
 172
 173def getusage(path):     # Get disk usage statistics
 174    disk = os.statvfs(path)
 175    cap = float(disk.f_bsize*disk.f_blocks)                     # disk capacity
 176    alloc = float(disk.f_bsize*(disk.f_blocks-disk.f_bfree))    # size of path
 177    free = float(disk.f_bsize*disk.f_bfree)                     # free space on disk (blocks, not usable space)
 178    ratio = alloc / cap * 100                                   # percentage used
 179    return diskstat(cap, alloc, free, ratio)
 180
 181def orderbyfreq(l):     # order a list by the frequency of its elements and remove duplicates
 182    temp_l = l[:]
 183    l = list(set(l))
 184    l.sort(key=lambda x:temp_l.count(x))
 185    return l
 186
 187def addtag(l, tag):  # add prefix and suffix tags to each item in a list
 188    l2 = ['<' + tag + '>' + i + '</' + tag + '>' for i in l]
 189    return l2
 190
 191def truncl(input, limit):      # truncate list
 192    if (len(input) > limit):
 193        more = str(len(input) - limit)
 194        output = input[-limit:]
 195        output.append("+ " + more + " more")
 196        return(output)
 197    else:
 198        return(input)
 199
 200#
 201#
 202#
 203
 204def sshd():
 205    logger.debug("starting sshd section")
 206    opentag('div', 1, 'sshd', 'section')
 207    matches = re.findall('.*sshd.*Accepted publickey for .* from .*', readlog('auth'))    # get all logins
 208    users = []  # list of users with format [username, number of logins] for each item
 209    data = []
 210    num = sum(1 for x in matches)     # total number of logins
 211    for match in matches:
 212        entry = re.search('^.*publickey\sfor\s(\w*)\sfrom\s(\S*)', match)  # [('user', 'ip')]
 213
 214        user = entry.group(1)
 215        ip = entry.group(2)
 216
 217        userhost = user + '@' + resolve(ip)
 218        exists = [i for i, item in enumerate(users) if re.search(userhost, item[0])]
 219        if (exists == []):
 220            users.append([userhost, 1])
 221        else:
 222            users[exists[0]][1] += 1
 223
 224    writetitle('sshd')
 225    subtitle = plural('login', num) + ' from'
 226    if (len(users) == 1):             # if only one user, do not display no of logins for this user
 227        logger.debug("found " + str(len(matches)) + " ssh logins for user " + users[0][0])
 228        subtitle += ' ' + users[0][0]
 229        writedata(subtitle)
 230    else:
 231        subtitle += ':'
 232        for user in users:
 233            data.append(user[0] + ' (' + str(user[1]) + ')')
 234            if len(data) > MAXLIST:     # if there are lots of users, truncate them
 235                data.append('+ ' + str(len(users) - MAXLIST - 1) + " more")
 236                break
 237        logger.debug("found " + str(len(matches)) + " ssh logins for users " + str(data))
 238        writedata(subtitle, data)
 239    closetag('div', 1)
 240    logger.info("finished sshd section")
 241
 242#
 243#
 244#
 245
 246def sudo():
 247    logger.debug("starting sudo section")
 248    opentag('div', 1, 'sudo', 'section')
 249    umatches = re.findall('.*sudo:session\): session opened.*', readlog('auth'))
 250    num = sum(1 for line in umatches)    # total number of sessions
 251    users = []
 252    data = []
 253    for match in umatches:
 254        user = re.search('.*session opened for user root by (\S*)\(uid=.*\)', match).group(1)
 255        exists = [i for i, item in enumerate(users) if re.search(user, item[0])]
 256        if (exists == []):
 257            users.append([user, 1])
 258        else:
 259            users[exists[0]][1] += 1
 260    commands = []
 261    cmatches = re.findall('sudo:.*COMMAND\=(.*)', readlog('auth'))
 262    for cmd in cmatches:
 263        commands.append(cmd)
 264    logger.debug("found the following commands: " + str(commands))
 265    # temp_cmd=commands[:]
 266    # commands = list(set(commands))
 267    # commands.sort(key=lambda x:temp_cmd.count(x))
 268    commands = orderbyfreq(commands)
 269    logger.debug("top 3 sudo commands: " + str(commands[-3:]))
 270
 271    writetitle("sudo")
 272    subtitle = plural("sudo session", num) + " for"
 273    if (len(users) == 1):
 274        logger.debug("found " + str(num) + " sudo session(s) for user " + str(users[0]))
 275        subtitle += ' ' + users[0][0]
 276        writedata(subtitle)
 277    else:
 278        subtitle += ':'
 279        for user in users:
 280            data.append(user[0] + ' (' + str(user[1]) + ')')
 281            if len(data) > 3:
 282                data.append('+ ' + str(len(users) - 2) + " more")
 283                break
 284        logger.debug("found " + str(len(matches)) + " sudo sessions for users " + str(data))
 285        writedata(subtitle, data)
 286    if (len(commands) > 0):
 287        commands = addtag(commands, 'code')
 288        commands = truncl(commands, CMDNO)
 289        writedata("top sudo commands", [c for c in commands])
 290    closetag('div', 1)
 291    logger.info("finished sudo section")
 292
 293#
 294#
 295#
 296
 297def cron():
 298    logger.debug("starting cron section")
 299    opentag('div', 1, 'cron', 'section')
 300    matches = re.findall('.*CMD\s*\(\s*(?!.*cd)(.*)\)', readlog('cron'))
 301    num = sum(1 for line in matches)
 302    commands = []
 303    for match in matches:
 304        commands.append(str(match))
 305    # commands.append([str(match)for match in matches])
 306    logger.debug("found cron command " + str(commands))
 307    logger.info("found " + str(num) + " cron jobs")
 308    subtitle = str(num) + " cron jobs run"
 309    writetitle("cron")
 310    writedata(subtitle)
 311    if (matches > 0):
 312        commands = orderbyfreq(commands)
 313        commands = addtag(commands, 'code')
 314        commands = truncl(commands, CMDNO)
 315        writedata("top cron commands", [c for c in commands])
 316    closetag('div', 1)
 317    logger.info("finished cron section")
 318
 319#
 320#
 321#
 322
 323def nameget():
 324    logger.debug("starting nameget section")
 325    opentag('div', 1, 'nameget', 'section')
 326    syslog = readlog('sys')
 327    failed = re.findall('.*nameget.*downloading of (.*) from .*failed.*', syslog)
 328    n_f = sum(1 for i in failed)
 329    l_f = []
 330    for i in failed:
 331        l_f.append(i)
 332    logger.debug("the following downloads failed: " + str(l_f))
 333    succ = re.findall('.*nameget.*downloaded.*', syslog)
 334    n_s = sum(1 for i in succ)
 335    l_s = []
 336    for i in succ:
 337        l_s.append(i)
 338    logger.debug("the following downloads succeeded: " + str(l_f))
 339    logger.debug("found " + str(n_s) + " successful downloads, and " + str(n_f) + " failed attempts")
 340    writetitle("nameget")
 341    writedata(str(n_s) + " succeeded", truncl(orderbyfreq(l_s), CMDNO))
 342    writedata(str(n_f) + " failed", truncl(orderbyfreq(l_f), CMDNO))
 343    closetag('div', 1)
 344    logger.info("finished nameget section")
 345
 346#
 347#
 348#
 349
 350def httpd():
 351    logger.info("starting httpd section")
 352    opentag('div', 1, 'httpd', 'section')
 353    accesslog = readlog("httpd/access.log")
 354    a = len(accesslog)
 355    errorlog = readlog("httpd/error.log")
 356    e = len(errorlog)
 357    data_b = 0
 358
 359    for line in accesslog.split('\n'):
 360        try:
 361            data_b += int(re.search('.*HTTP/\d\.\d\" 200 (\d*) ', line).group(1))
 362        except Exception as error:
 363            if type(error) is AttributeError:
 364                pass
 365            else:
 366                logger.warning("error processing httpd access log: " + str(error))
 367    data_h = parsesize(data_b)
 368
 369    logger.debug("httpd has transferred " + str(data_b) + " bytes in response to " + str(a) + " requests with " + str(e) + " errors")
 370
 371    writetitle("apache")
 372    writedata(data_h + " transferred")
 373    writedata(str(a) + " requests")
 374    writedata(str(e) + " errors")
 375
 376    closetag('div', 1)
 377    logger.info("finished httpd section")
 378
 379#
 380#
 381#
 382
 383def httpdsession():
 384    # logger.debug("starting httpd section")
 385    opentag('div', 1, 'httpd', 'section')
 386    httpdlog = requests.get(HTTPDSTATUS).content
 387    uptime = re.search('.*uptime: (.*)<', httpdlog).group(1)
 388    uptime = re.sub(' minute[s]', 'm', uptime)
 389    uptime = re.sub(' second[s]', 's', uptime)
 390    uptime = re.sub(' day[s]', 's', uptime)
 391    uptime = re.sub(' month[s]', 'mo', uptime)
 392    accesses = re.search('.*accesses: (.*) - .*', httpdlog).group(1)
 393    traffic = re.search('.*Traffic: (.*)', httpdlog).group(1)
 394    return("<br /><strong>httpd session: </strong> up " + uptime + ", " + accesses + " requests, " + traffic + " transferred")
 395    closetag('div', 1)
 396    # logger.info("finished httpd section")
 397
 398#
 399#
 400#
 401
 402def smbd():
 403    logger.debug("starting smbd section")
 404    opentag('div', 1, 'smbd', 'section')
 405    files = glob.glob(SMBDDIR + "/log.*[!\.gz][!\.old]")    # find list of logfiles
 406    n_auths = 0         # total number of logins from all users
 407    sigma_auths = []    # contains users and their respective no. of logins
 408    output = ""
 409
 410    for file in files:  # one log file for each client
 411
 412        # find the machine (ip or hostname) that this file represents
 413        ip = re.search('log\.(.*)', file).group(1)    # get ip or hostname from file path (/var/log/samba/log.host)
 414        host = resolve(ip)
 415
 416        # count number of logins from each user
 417        matches = re.findall('.*sam authentication for user \[(.*)\] succeeded.*', readlog(file))
 418        for match in matches:
 419            userhost = match + "@" + host
 420            exists = [i for i, item in enumerate(sigma_auths) if re.search(userhost, item[0])]
 421            if (exists == []):
 422                sigma_auths.append([userhost, 1])
 423            else:
 424                sigma_auths[exists[0]][1] += 1
 425            n_auths += 1
 426    writetitle("samba")
 427    subtitle = plural("login", n_auths) + " from"
 428    data = []
 429    if (len(sigma_auths) == 1):             # if only one user, do not display no of logins for this user
 430        subtitle += ' ' + sigma_auths[0][0]
 431        writedata(subtitle)
 432    else:       # multiple users
 433        subtitle += ':'
 434        for x in sigma_auths:
 435            data.append((str(x[0])) + " (" + str(x[1]) + ")")
 436            if len(data) > MAXLIST:      # if many users, truncate them
 437                data.append('+ ' + str(len(sigma_auths) - MAXLIST - 1) + " more")
 438                break
 439        logger.debug("found " + str(n_auths) + " samba logins for users " + str(sigma_auths))
 440        writedata(subtitle, data)
 441    closetag('div', 1)
 442    logger.info("finished smbd section")
 443
 444#
 445#
 446#
 447
 448def postfix():
 449    logger.debug("starting postfix section")
 450    opentag('div', 1, 'postfix', 'section')
 451    messages = re.findall('.*from\=<.*>, size\=(\d*),.*\n.*\n.*\: removed\n.*', readlog('postfix'))
 452    size = sum([int(x) for x in messages])
 453    size = parsesize(size)
 454    n = str(len(messages))
 455    writetitle("postfix")
 456    writedata(n + " messages sent")
 457    writedata("total of " + size)
 458    closetag('div', 1)
 459    logger.info("finished postfix section")
 460
 461#
 462#
 463#
 464
 465def zfs():
 466    logger.debug("starting zfs section")
 467    opentag('div', 1, 'zfs', 'section')
 468    zfslog = readlog('zfs')
 469    pool = re.search('.*---\n(\w*)', zfslog).group(1)
 470    scrub = re.search('.*scrub repaired (\d*) in \d*h\d*m with (\d*) errors on (\S*\s)(\S*)\s(\d+\s)', zfslog)
 471    iostat = re.search('.*---\n\w*\s*(\S*)\s*(\S*)\s', zfslog)
 472    scrubrepairs = scrub.group(1)
 473    scruberrors = scrub.group(2)
 474    scrubdate = scrub.group(3) + scrub.group(5) + scrub.group(4)
 475    alloc = iostat.group(1)
 476    free = iostat.group(2)
 477    writetitle("zfs")
 478    subtitle = "Scrub on " + scrubdate + ": "
 479    data = [scrubrepairs + " repaired", scruberrors + " errors", alloc + " used", free + " free"]
 480    writedata(subtitle, data)
 481    closetag('div', 1)
 482    logger.info("finished zfs section")
 483
 484#
 485#
 486#
 487
 488def temp():
 489    logger.debug("starting temp section")
 490    opentag('div', 1, 'temp', 'section')
 491    sensors.init()
 492    coretemps = []
 493    pkgtemp = 0
 494    systemp = 0
 495    try:
 496        print(sensors.iter_detected_chips())
 497        for chip in sensors.iter_detected_chips():
 498            for feature in chip:
 499                if "Core" in feature.label:
 500                    coretemps.append([feature.label, feature.get_value()])
 501                    logger.debug("found core " + feature.label + " at temp " + str(feature.get_value()))
 502                if "CPUTIN" in feature.label:
 503                    pkgtemp = str(feature.get_value())
 504                    logger.debug("found cpu package at temperature " + pkgtemp)
 505                if "SYS" in feature.label:
 506                    systemp = feature.get_value()
 507                    logger.debug("found sys input " + feature.label + " at temp " + str(feature.get_value()))
 508        core_avg = reduce(lambda x, y: x[1] + y[1], coretemps) / len(coretemps)
 509        logger.debug("average cpu temp is " + str(core_avg))
 510        coretemps.append(["avg", str(core_avg)])
 511        coretemps.append(["pkg", pkgtemp])
 512        coretemps = [x[0] + ": " + str(x[1]) + '&#8451;' for x in coretemps]
 513    finally:
 514        sensors.cleanup()
 515
 516    # For this to work, `hddtemp` must be running in daemon mode.
 517    # Start it like this (bash):   sudo hddtemp -d /dev/sda /dev/sdX...
 518    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 519    s.connect(('localhost',HDDTEMPPORT))
 520    output = s.recv(4096)
 521    output += s.recv(4096)
 522    s.close()
 523    hddtemps = []
 524    for drive in re.split('\|{2}', output):
 525        try:
 526            fields = re.search('\|*(/dev/sd.)\|.*\|(\d+)\|(.)', drive)
 527            name = fields.group(1)
 528            temp = float(fields.group(2))
 529            units = fields.group(3)
 530            hddtemps.append(drivetemp(name, temp, units))
 531        except:
 532            pass
 533    hddtotal = 0
 534    data = []
 535    for drive in hddtemps:
 536        data.append(drive.name + ': ' + str(drive.temp) + drive.units)
 537        logger.debug("found disk " + drive.name + " at " + str(drive.temp))
 538        hddtotal += drive.temp
 539    logger.debug("found " + str(len(hddtemps)) + " disks")
 540    logger.debug("sum of disk temps is " + str(hddtotal))
 541    hddavg = hddtotal/float(len(hddtemps))
 542    logger.debug("avg disk temp is " + str(hddavg))
 543    data.append("avg: " + str(hddavg))
 544    writetitle("temperatures")
 545    if (systemp != 0):
 546        writedata("sys: " + str(systemp) + '&#8451;')
 547    if (coretemps != ''):
 548        writedata("cores", coretemps)
 549    if (hddtemps != ''):
 550        writedata("disks", data)
 551
 552    closetag('div', 1)
 553    logger.info("finished temp section")
 554
 555#
 556#
 557#
 558
 559def du():
 560    logger.debug("starting du section")
 561    opentag('div', 1, 'du', 'section')
 562    out = []
 563    content = readlog('alloc')
 564    contentnew = ""
 565    for p in DUPATHS:
 566        alloc_f = getusage(p).alloc
 567        delta = None
 568        try:
 569            alloc_i = re.search(p + '\t(.*)\n', content).group(1)
 570            delta = alloc_f - float(alloc_i)
 571        except:
 572            pass
 573        logger.debug("delta is " + str(delta))
 574        if (delta == None):
 575            out.append([p, "used " + parsesize(alloc_f)])
 576        else:
 577            out.append([p, "used " + parsesize(alloc_f), "delta " + parsesize(delta)])
 578        contentnew += (p + '\t' + str(alloc_f) + '\n')
 579    writelog('alloc', contentnew)
 580
 581    writetitle("du")
 582    logger.debug("disk usage data is " + str(out))
 583    for path in out:
 584        writedata(path[0], [p for p in path[1:]])
 585
 586    closetag('div', 1)
 587    logger.info("finished du section")
 588
 589#
 590#
 591#
 592
 593timenow = time.strftime("%H:%M:%S")
 594datenow = time.strftime("%x")
 595
 596pathfilter = {"auth": AUTHPATH, "cron": CRONPATH, "sys": SYSPATH, "postfix": POSTFIXPATH, "smb": SMBDDIR, "zfs": ZFSPATH, "alloc": ALLOCPATH, "httpd": HTTPDDIR, "header": HEADERPATH}
 597pathfilter = dict((re.escape(k), v) for k, v in pathfilter.iteritems())
 598pathpattern = re.compile("|".join(pathfilter.keys()))
 599
 600varfilter = {"$title$": TITLE, "$date$": datenow, "$time$": timenow, "$hostname$": hostname(), "$version$": VERSION}
 601varfilter = dict((re.escape(k), v) for k, v in varfilter.iteritems())
 602varpattern = re.compile("|".join(varfilter.keys()))
 603
 604
 605__main__()