logparse / formatting.pyon commit rename parsers, better journald integration (e1f7605)
   1# -*- coding: utf-8 -*-
   2
   3"""   
   4This file contains global functions for formatting and printing data. This file
   5should be imported into individual log-parsing scripts located in the default
   6logparse.parsers module or in the user-supplied parsers directory. Data is
   7formatted in HTML or plaintext. Writing to disk and/or emailng data is left to
   8interface.py.
   9"""
  10
  11import os
  12import re
  13import locale
  14from string import Template
  15from math import floor, ceil
  16from tabulate import tabulate
  17import textwrap
  18
  19import logparse
  20from logparse import interface, util, mail, config
  21
  22import logging
  23logger = None
  24logger = logging.getLogger(__name__)
  25
  26
  27locale.setlocale(locale.LC_ALL, '') # inherit system locale
  28
  29
  30DEG = u'\N{DEGREE SIGN}'
  31CEL = "C"
  32TIMEFMT = "%X"
  33DATEFMT = "%x"
  34CORNERCHARS_DOUBLE = ['╚', '╝', '╗', '╔']
  35CORNERCHARS_SINGLE = ['└', '┘', '┐', '┌']
  36LINECHARS_DOUBLE = ['║', '═']
  37LINECHARS_SINGLE = ['│', '─']
  38JXNCHARS_DOUBLE = ['╠', '╣', '╦', '╩', '╬']
  39JXNCHARS_SINGLE = ['├', '┤', '┬', '┴', '┼']
  40BULLET = "• "
  41INDENT = "  "
  42SPLIT_CHARS = ['.', '(', ')', '[', ']', '&', r"/", "\\", ',', '-', '_']
  43
  44
  45global VARSUBST
  46
  47
  48def init_var():
  49    """
  50    Initialise variable substitution templates (should be called before doing
  51    any substitutions)
  52    """
  53
  54    global VARSUBST
  55    css_path = config.prefs.get("html", "css")
  56    if config.prefs.getboolean("html", "css-relpath"):
  57        if interface.argparser.parse_args().no_write:
  58            css_path = os.path.relpath(css_path, ".")
  59        elif interface.argparser.parse_args().destination:
  60            css_path = os.path.relpath(
  61                    css_path, interface.argparser.parse_args().destination())
  62        elif config.prefs.get("logparse", "output"):
  63            css_path = os.path.relpath(
  64                    css_path, config.prefs.get("logparse", "output"))
  65    VARSUBST = {
  66        "title": config.prefs.get("logparse", "title"),
  67        "date": interface.start.strftime(DATEFMT),
  68        "time": interface.start.strftime(TIMEFMT),
  69        "hostname": util.hostname(config.prefs.get(
  70            "logparse", "hostname-path")),
  71        "version": logparse.__version__,
  72        "css": css_path,
  73        "period": util.LogPeriod("logparse").startdate.strftime(
  74            TIMEFMT + " " + DATEFMT)
  75    }
  76
  77
  78class Output:
  79    """
  80    Base class for a data processor. 
  81    """
  82    
  83    def __init__(self):
  84        self.content = ""
  85        self.destination = ""
  86
  87    def append(self, content):
  88        """
  89        Add a string
  90        """
  91
  92        self.content += content
  93
  94    def write(self, destination=""):
  95        """
  96        Output contents into a file
  97        """
  98
  99        if destination == "":
 100            destination = self.destination
 101        if destination == "":
 102            logger.warning("No destination path provided")
 103            return 1
 104        with open(destination, 'w') as f:
 105            f.write(self.content)
 106            logger.info("Written output to {}".format(destination))
 107
 108    def print_stdout(self, lines=False):
 109        """
 110        Echo the contents to the console
 111        """
 112
 113        print()
 114        if lines:
 115            line = PlaintextLine(linewidth=
 116                    config.prefs.getint("plain", "linewidth"), double=True)
 117            print(line.draw())
 118        print(self.content)
 119        if lines:
 120            print(line.draw())
 121        print()
 122
 123
 124class PlaintextOutput(Output):
 125    """
 126    Processes & outputs data in a plaintext form which can be read with cat or
 127    plaintext email.
 128    """
 129
 130    def __init__(self, linewidth=80):
 131        self.content = ""
 132        self.destination = ""
 133        self.linewidth = linewidth;
 134
 135    def append_header(self, template=''):
 136        """
 137        Print details with some primitive formatting
 138        """
 139        box = PlaintextBox(content=
 140                Template("$title $version on $hostname\n\n$time $date"
 141                    "\nParsing logs since $period")
 142                .safe_substitute(VARSUBST),
 143                vpadding=2, hpadding="\t\t", linewidth=self.linewidth)
 144        self.append(box.draw() + "\n"*2)
 145
 146    def append_footer(self):
 147        """
 148        Append a horizontal line and some details
 149        """
 150        self.append(PlaintextLine(self.linewidth).draw())
 151        self.append(Template("$hostname $time $date").safe_substitute(VARSUBST))
 152
 153    def append_section(self, section):
 154        """
 155        Call the appropriate methods to format a section (provided by a parser).
 156        This should be run by interface.py after every instance of parse_log().
 157        """
 158
 159        if section == None:
 160            logger.warning("Received null section")
 161            return
 162        self.append(PlaintextBox(
 163            content=section.title, double=False,
 164            fullwidth=False, vpadding=0, hpadding=" ").draw())
 165        if section.period and section.period.unique:
 166            self.append("\n(since {0})".format(
 167                section.period.startdate.strftime(DATEFMT + " " + TIMEFMT)))
 168        self.append('\n'*2)
 169        for data in section.data:
 170            self.append(self._fmt_data(data.subtitle, data.items))
 171            self.append('\n')
 172        for table in section.tables:
 173            self.append(table.draw_plain())
 174        self.append("\n")
 175
 176    def _fmt_data(self, subtitle, data = None):   # write title and data
 177        """
 178        Format the properties of a data object into usable plaintext form with
 179        a few fancy symbols/formatting tricks. Subtitle is required, data is
 180        not. If only subtitle is supplied or subtitle + one data item, a single
 181        line will be printed.
 182        """
 183
 184        if (subtitle == ""):
 185            logger.warning("No subtitle provided.. skipping section")
 186            return
 187
 188        logger.debug("Processing data {}".format(subtitle))
 189
 190        if (data == None or len(data) == 0):
 191            # If no list items are provided, just print the subtitle
 192            return subtitle + "\n"
 193        elif (len(data) == 1):
 194            # If only one item is provided, print it inline with subtitle
 195            return self._wrap_datum("{}: {}".format(subtitle, data[0]),
 196                    bullet=False, indent=False) + "\n"
 197        else:
 198            # If many items are provided, print them all as a bulleted list
 199            itemoutput = subtitle + ":\n"
 200            for datum in data:
 201                itemoutput += self._wrap_datum(datum) + "\n"
 202            return itemoutput
 203
 204    def _wrap_datum(self, text, bullet=True, indent=True):
 205        """
 206        Use cpython's textwrap module to limit line width to the value 
 207        specified in self.linewidth. This is much easier than doing it all
 208        from scratch (which I tried to do originally). Note that line 
 209        continuations are automatically indented even if they don't have a 
 210        bullet. This is to make it clear which lines are continuations.
 211        """
 212
 213        wrapper = textwrap.TextWrapper(
 214                initial_indent=(INDENT if indent else "") \
 215                        + (BULLET if bullet else ""),
 216                subsequent_indent=INDENT + (' '*len(BULLET) if bullet else ""),
 217                width=self.linewidth,
 218                replace_whitespace=True)
 219
 220        return wrapper.fill(text)
 221
 222
 223class HtmlOutput(Output):
 224    """
 225    Process and output data in HTML format. All HTML formatting functions now
 226    reside in this class to differentiate them from plain text.
 227    """
 228
 229    def __init__(self):
 230        """
 231        Initialise variables (no parameters required for initialisation)
 232        """
 233
 234        self.content = ""
 235        self.destination = ""
 236        self.css = ""
 237        self._embedded = ""
 238
 239    def embed_css(self, css):
 240        """
 241        Convert stylesheet to inline tags
 242        """
 243
 244        if not self._embedded:
 245            self._embedded = mail.mailprep(re.sub(
 246                ".*" + re.escape(VARSUBST['css']) + ".*\n", "", self.content),
 247                css)
 248        return self._embedded
 249
 250    def write_embedded(self, destination = ""):
 251        """
 252        Write contents to file with inline CSS tags
 253        """
 254
 255        logger.debug("Writing HTML with embedded styles to " + destination)
 256        if not self._embedded:
 257            logger.warning("Call to write_embedded before embed_css - \
 258                    embedding stylesheets immediately")
 259            self.embed_css(config.prefs.get("html", "css"))
 260        if destination == "":
 261            destination = self.destination
 262        if destination == "":
 263            logger.warning("No destination path provided")
 264            return 1
 265        with open(destination, 'w') as f:
 266            f.write(self._embedded)
 267            logger.info("Written output to {}".format(destination))
 268
 269
 270    def append_header(self, template):
 271        """
 272        Insert variables into header template file and append HTML tags
 273        """
 274
 275        self.headertemplate = template
 276        headercontent = Template(open(template, 'r').read())
 277        self.append(headercontent.safe_substitute(VARSUBST))
 278        self.append(opentag('div', id='main'))
 279
 280    def append_footer(self):
 281        """
 282        Close HTML tags that were opened in the template.
 283        TODO: add footer template similar to header template.
 284        """
 285
 286        self.append(closetag('div') + closetag('body') + closetag('html'))
 287
 288    def append_section(self, section):
 289        """
 290        Call the appropriate methods to generate HTML tags for a section
 291        (provided by a parser). This should be run by interface.py after every
 292        instance of parse_log().
 293        """
 294
 295        if section == None:
 296            logger.warning("Received null section")
 297            return
 298        self.append(opentag('div', 1, section.title, 'section'))
 299        self.append(self._gen_title(section.title))
 300        if section.period and section.period.unique:
 301            self.append(self._fmt_period(section.period))
 302        for data in section.data:
 303            self.append(self._fmt_data(data.subtitle, data.items, data.severity))
 304        for table in section.tables:
 305            self.append(table.draw_html())
 306        self.append(closetag('div', 1))
 307
 308    def _gen_title(self, title):
 309        """
 310        Format the title for a section
 311        """
 312
 313        if (title == '' or '\n' in title):
 314            logger.error("Invalid title")
 315            raise ValueError 
 316        logger.debug("Writing title for " + title)
 317        return tag('h2', False, title)
 318
 319    def _fmt_data(self, subtitle, data=None, severity=0):
 320        """
 321        Format the properties of a data object into usable HTML tags.
 322        Subtitle is required, data is not. If only subtitle is supplied or
 323        subtitle + one data item, a single line will be printed.
 324        """
 325
 326        if (subtitle == ""):
 327            logger.warning("No subtitle provided.. skipping section")
 328            return
 329
 330        if (data == None or len(data) == 0):
 331            logger.debug("No data provided.. just printing subtitle")
 332            return tag('p', False, subtitle, cl="severity-" + str(severity))
 333        else:
 334            logger.debug("Received data {}: {}".format(subtitle, data))
 335            subtitle += ':'
 336            if (len(data) == 1):
 337                return tag('p', False, subtitle + ' ' + data[0],
 338                        cl="severity-" + str(severity))
 339            else:
 340                output = ""
 341                output += tag('p', False, subtitle,
 342                        cl="severity-" + str(severity))
 343                output += opentag('ul', 1)
 344                coderegex = re.compile('`(.*)`')
 345                for datum in data:
 346                    if datum == "" or datum == None:
 347                        continue
 348                    datum = coderegex.sub(r"<code>\1</code>", str(datum))
 349                    output += tag('li', False, datum)
 350                output += closetag('ul', True)
 351                return output
 352
 353    def _fmt_period(self, period):
 354        output = ''
 355        output += opentag('span', cl='period')
 356        output += "since " + period.startdate.strftime(DATEFMT + " " + TIMEFMT)
 357        output += closetag('span')
 358        return output
 359
 360    def print_stdout_embedded(self, lines=False):
 361        """
 362        Echo the version with embedded style tags to the console
 363        """
 364
 365        if self._embedded == "":
 366            self.embed_css(config.prefs.get("html", "css"))
 367        print()
 368        if lines:
 369            line = PlaintextLine(linewidth=
 370                    config.prefs.getint("plain", "linewidth"), double=True)
 371            print(line.draw())
 372        print(self._embedded)
 373        if lines:
 374            print(line.draw())
 375        print()
 376
 377
 378class Section:
 379    """
 380    Each parser should output a Section() which contains the title, returned
 381    data, and applicable time period.
 382    """
 383
 384    def __init__(self, title, period=None):
 385        self.title = title
 386        self.data = []
 387        self.tables = []
 388        self.period = util.LogPeriod(self.title)
 389
 390    def append_data(self, data):
 391        self.data.append(data)
 392
 393    def append_table(self, table):
 394        self.tables.append(table)
 395
 396
 397class Data:
 398    """
 399    Each section (parser) can have one or more Data() objects which are
 400    essentially glorified lists with titles (`self.subtitle`).
 401    """
 402    
 403    def __init__(self, subtitle="", items=[], severity=0):
 404        """
 405        Initialise variables. No parameters are enforced upon initialisation,
 406        but at least the subtitle is required for valid output. Severity refers
 407        to the importance of the data (integer from 0 to 5). e.g. a failed
 408        system should have severity 5 and will be formatted appropriately by
 409        the Output object.
 410        """
 411
 412        self.subtitle = subtitle
 413        self.items = items 
 414        self.severity = severity
 415
 416    def truncl(self, limit):      # truncate list
 417        """
 418        Truncate self.items to a specified value and state how many items are
 419        hidden. Set limit to -1 to avoid truncating any items.
 420        """
 421
 422        if limit == -1:
 423            return self
 424        if (len(self.items) > limit):
 425            more = len(self.items) - limit
 426            if more == 1:
 427                return 0
 428            self.items = self.items[:limit]
 429            self.items.append("+ {0} more".format(str(more)))
 430        return self
 431
 432    def orderbyfreq(self):
 433        """
 434        Order a list by frequency of each item, then remove duplicates and
 435        append frequency in parentheses.
 436        """
 437
 438        unsorted = list(self.items)
 439        self.items = ["{0} ({1})".format(y, unsorted.count(y)) for y in sorted(
 440            set(unsorted), key = lambda x: -unsorted.count(x))]
 441        return self
 442
 443
 444class Table(object):
 445    """
 446    A wrapper for python-tabulate's Tabulate type.
 447    """
 448    
 449    def __init__(self, double=False, borders=False, hpadding=" ",
 450            maxwidth=80, headers=[]):
 451        """
 452        Initialise variables. Note the keymap is used for a faster index map,
 453        but is not currently used anywhere (may be removed in future).
 454        """
 455
 456        self.rows =  []     # List of row objects
 457        self.keymap = {}    # For fast lookup of row by value of first column 
 458        self.double = double
 459        self.borders = borders
 460        self.align_cols = []
 461        self.hpadding = hpadding
 462        self.maxwidth = maxwidth
 463        self.headers = headers
 464        self._align_cols = []
 465
 466    def add_row(self, row):
 467        """
 468        Append a row to the list and amend index mapping
 469        """
 470
 471        self.rows.append(row)
 472        if len(row.columns) > 0:
 473            self.keymap[row.columns[0]] = row
 474
 475        logger.debug("Added row with {0} columns".format(str(len(row.columns))))
 476
 477    def align_column(self, i, align):
 478        """
 479        Set alignment for the 'i'th column (`align` should be 'l', 'c' or 'r')
 480        """
 481
 482        while len(self._align_cols) -1 < i:
 483            self._align_cols.append("")
 484        self._align_cols[i] = align
 485        for row in self.rows:
 486            row.columns[i].align = align
 487        logger.debug("Column alignment is now {0}".format(self._align_cols))
 488
 489    def _gen_list(self):
 490        """
 491        Used locally for organising rows and columns into a 2D list structure
 492        """
 493
 494        hierarchy = []
 495        for row in self.rows:
 496            row_data = []
 497            for column in row.columns:
 498                row_data.append(column.content)
 499            hierarchy.append(row_data)
 500        return hierarchy
 501
 502    def draw_html(self):
 503        """
 504        Output HTML string (wrapper for tabulate)
 505        """
 506
 507        output = tabulate(self._gen_list(), self.headers, tablefmt="html",
 508                colalign=tuple(self._align_cols))
 509        return output
 510
 511    def draw_plain(self):
 512        """
 513        Output plain text string (wrapper for tabulate)
 514        """
 515
 516        output = tabulate(self._gen_list(), self.headers,
 517                tablefmt="fancy_grid" if self.borders
 518                else "plain", colalign=tuple(self._align_cols))
 519        return output + "\n"*2
 520
 521
 522class Row(object):
 523    """
 524    Object representing a literal row in a 2D table with the individual cells
 525    in the row represented by columns[].
 526    """
 527    
 528    def __init__(self, columns=[], header=False):
 529        """
 530        Initialise variables. The variable n is used locally to keep track of
 531        the row width.
 532        """
 533
 534        self.columns = columns
 535        self.header = header
 536        self.n = len(self.columns)
 537
 538    def add_column(self, column):
 539        """
 540        Append a single cell horizontally and increment the cell count
 541        """
 542
 543        self.columns.append(column)
 544        self.n += 1
 545
 546    def rm_column(self, column):
 547        """
 548        Remove the specified column object and decrement the cell count
 549        """
 550
 551        self.remove(column)
 552        self.n -= 1
 553
 554
 555class Column(object):
 556    """
 557    Object representing a single table cell. "Column" is somewhat of a misnomer 
 558    - one column object exists for each cell in the table. Columns are children
 559    of rows.
 560    """
 561
 562    def __init__(self, content="", align="right"):
 563        """
 564        Initialise variables. The align property sets the alignment of a single
 565        cell ('l', 'c', or 'r').
 566        """
 567
 568        self.content = content
 569        self.align = align
 570
 571
 572class PlaintextLine:
 573    """
 574    Draw a horizontal line for plain text format, with optional padding/styling.
 575    """
 576
 577    def __init__(self, linewidth=80, double=True, vpadding=0, hpadding=""):
 578        """
 579        Initialise variables
 580        """
 581
 582        self.linewidth = linewidth
 583        self.double = double
 584        self.vpadding = vpadding
 585        self.hpadding = hpadding
 586
 587    def draw(self):
 588        """
 589        Output a plain text string based on the current object parameters
 590        """
 591
 592        line = (LINECHARS_DOUBLE[1] if self.double else LINECHARS_SINGLE[1])
 593        return "\n" * self.vpadding + self.hpadding \
 594                +  line * (self.linewidth - 2 * len(self.hpadding)) \
 595                + self.hpadding + "\n" * (self.vpadding + 1)
 596
 597
 598class PlaintextBox:
 599    """
 600    Draw a rectangular box around text, with customisable padding/size/style
 601    """
 602
 603    def __init__(self, content="", double=True, fullwidth=True, linewidth=80,
 604            hpadding="\t", vpadding=1):
 605        """
 606        Initialise variables
 607        """
 608        self.content = content
 609        self.fullwidth = fullwidth
 610        self.linewidth = linewidth
 611        self.hpadding = hpadding 
 612        self.vpadding = vpadding
 613        self.double = double
 614
 615    def draw(self):
 616        """
 617        Output a plain text string based on the current object parameters. This
 618        involves calculating the text width, breaking text at the maximum line
 619        length, and then drawing a box around it all.
 620        """
 621
 622        if self.double == True:
 623            cornerchars = CORNERCHARS_DOUBLE
 624            linechars = LINECHARS_DOUBLE
 625        else:
 626            cornerchars = CORNERCHARS_SINGLE
 627            linechars = LINECHARS_SINGLE
 628
 629        # Check hpadding has a definite width
 630        self.hpadding = self.hpadding.replace("\t", " "*4)
 631
 632        # Calculate number of characters per line
 633        contentlines = self.content.splitlines()
 634        contentwidth = int((self.linewidth if self.linewidth > 0 else 80)
 635                if self.content.splitlines()
 636                else len(max(contentlines, key=len)))
 637        logger.debug("Content width is {0}".format(str(contentwidth)))
 638        logger.debug("Longest line is {0}".format(
 639            len(max(contentlines, key=len))))
 640        contentwidth += -2*(len(self.hpadding)+1)
 641        if not self.fullwidth:
 642            longestline = len(max(contentlines, key=len))
 643            if longestline <= self.linewidth - 2*(len(self.hpadding)+1):
 644                contentwidth = longestline
 645
 646        # Split lines that are too long
 647        for i, line in enumerate(contentlines):
 648            if len(line) > contentwidth:
 649                words = line.split()
 650                if max(map(len, words)) > contentwidth:
 651                    continue
 652                res, part, others = [], words[0], words[1:]
 653                for word in others:
 654                    if len(' ') + len(word) > contentwidth - len(part):
 655                        res.append(part)
 656                        part = word
 657                    else:
 658                        part += ' ' + word
 659                if part:
 660                    res.append(part)
 661                contentlines[i] = res
 662
 663        # Flatten list
 664        #   Note list comprehension doesn't work here, so we must iterate
 665        #   through each item
 666        newlines = []
 667        for line in contentlines:
 668            if isinstance(line, list):
 669                for subline in line:
 670                    newlines.append(subline)
 671            else:
 672                newlines.append(line)
 673        contentlines = newlines
 674               
 675        # Add vertical padding
 676        for _ in range(self.vpadding):
 677            contentlines.insert(0, ' '*contentwidth)
 678            contentlines.append(' '*contentwidth)
 679
 680        # Insert horizontal padding on lines that are too short
 681        contentlines = [linechars[0] + self.hpadding + x
 682                + ' '*(self.linewidth-(len(x)+2*len(self.hpadding)+2)
 683                    if len(x) < contentwidth else 0)
 684                + self.hpadding + linechars[0] for x in contentlines]
 685        contentlines.insert(0, cornerchars[3] + linechars[1] 
 686                * (contentwidth + len(self.hpadding)*2) + cornerchars[2])
 687        contentlines.append(cornerchars[0] + linechars[1]
 688                * (contentwidth + len(self.hpadding)*2) + cornerchars[1])
 689        return ('\n').join(contentlines)
 690
 691
 692def backticks(l):
 693    """
 694    Surround every item in a list by backticks. Used for showing code in both
 695    HTML and plain text formats (converted to <code> tags for HTML)
 696    """
 697
 698    return ["`" + x + "`" for x in l]
 699
 700
 701def plural(noun, quantity, print_quantity=True):
 702    """
 703    Return "1 noun" or "n nouns"
 704    """
 705
 706    if (quantity == 1):
 707        if print_quantity:
 708            return(str(quantity) + " " + noun)
 709        else:
 710            return noun
 711    else:
 712        if noun.endswith("s"):
 713            noun += "e"
 714        if print_quantity:
 715            return(str(quantity) + " " + noun + "s")
 716        else:
 717            return noun + "s"
 718
 719
 720def parsesize(num, suffix='B'):
 721    """
 722    Return human-readable size from number of bytes
 723    """
 724
 725    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
 726        if abs(num) < 1024.0:
 727            return "%3.1f %s%s" % (num, unit, suffix)
 728        num /= 1024.0
 729    return "%.1f%s%s" % (num, 'Yi', suffix)
 730
 731
 732def fsubject(subject):
 733    """
 734    Replace variables in the title template provided in config
 735    """
 736
 737    r = Template(subject).safe_substitute(VARSUBST)
 738    logger.debug("Returning subject line " + r)
 739    return r
 740
 741
 742def opentag(tag, block=False, id=None, cl=None, style=None):
 743    """
 744    Write HTML opening tag
 745    """
 746
 747    output = ""
 748    if block:
 749        output += '\n'
 750    output += '<' + tag
 751    if id:
 752        output += " id='" + id + "'"
 753    if cl:
 754        output += " class='" + cl + "'"
 755    if style:
 756        output += " style='"
 757        output += " ".join("{0}: {1};".format(attr, value)
 758                for attr, value in style.items())
 759        output += "'"
 760    output += '>'
 761    if block:
 762        output += '\n'
 763    return output
 764
 765
 766def closetag(tag, block=False):
 767    """
 768    Write HTML closing tag
 769    """
 770
 771    if block:
 772        return "\n</" + tag + ">\n"
 773    else:
 774        return "</" + tag + ">"
 775
 776
 777def tag(tag, block=False, content="", id=None, cl=None, style=None):
 778    """
 779    Write HTML opening tag, content, and closing tag
 780    """
 781
 782    o = opentag(tag, block, id, cl, style)
 783    c = closetag(tag, block)
 784    return o + content + c