logparse / load_parsers.pyon commit rename parsers, better journald integration (e1f7605)
   1# -*- coding: utf-8 -*-
   2
   3"""
   4A basic "plugin loader" implementation which searches for default packaged and
   5user-supplied parser modules and verifies them so they can be executed by
   6logparse.interface. The requirements for parser modules and classes are
   7specified in the docstring of the Parser class.
   8
   9Classes in this module:
  10    - `Parser`:         Base class that every parser should inherit
  11    - `ParserLoader`:   Class used internally by `interface.py` to load parsers
  12"""
  13
  14import importlib
  15from importlib import util
  16from os.path import dirname
  17from pkgutil import iter_modules
  18import inspect
  19from pathlib import Path
  20import subprocess
  21from subprocess import Popen, PIPE
  22from typing import get_type_hints
  23
  24import logging
  25logger = logging.getLogger(__name__)
  26
  27
  28PARSER_DIR = "/usr/share/logparse/user-parsers"
  29PARSER_PKG = "logparse.parsers"
  30
  31
  32class Parser():
  33    """
  34    This is the base class that every parser should inherit. Parsers should
  35    each exist in their own module and contain a Parser class whose name is the
  36    same as the parser (e.g. `example.py` contains class `Example(Parser)`).
  37    Each parser module must contain exactly one Parser class definition, and
  38    this class cannot be a redefinition of the base Parser class (i.e. this
  39    class). This class must provide the parse_log() method which returns a
  40    logparse.formatting.Section object.
  41    """
  42
  43    def __init__(self, name=None, path=None, info=None, deprecated=False,
  44            successor=""):
  45        """
  46        The following variables can be set to display information about the
  47        parser. The object `self.logger` can be used for outputting messages to
  48        to whatever sink is set up in __init__.py (no setup required in the
  49        parser module itself).
  50        """
  51        self.name = str(name) if name else None
  52        self.path = Path(path) if path else None
  53        self.info = dict(info) if info else None
  54        self.logger = logging.getLogger(self.__module__)
  55        self.deprecated = deprecated
  56        self.successor = successor
  57
  58    def load(self):
  59        """
  60        A generic loading method to import a parser, only used for debugging
  61        """
  62        logger.debug("Loading parser {0} from {1}".format(
  63            self.name, str(self.path) if self.path != None else "defaults"))
  64        return importlib.import_module(self.name)
  65
  66    def parse_log(self, **args) -> None:
  67        """
  68        Every parser should provide the parse_log method which is executed at
  69        runtime to analyse logs. Verification checks should prevent the below
  70        exception from ever being raised.
  71        """
  72        raise NotImplementedError("Failed to find an entry point for parser")
  73
  74    def check_dependencies(self) -> tuple:
  75        """
  76        Parsers should check their own requirements here and return a boolean 
  77        value reflecting whether the parser can run successfully. Typically 
  78        this method should check for the program whose logs are being parsed, 
  79        as well as any external dependencies like libsystemd. This method 
  80        should return a tuple containing a boolean representing whether or not 
  81        the dependencies are satisfied and list containing the names of any 
  82        dependencies that are unsatisfied.
  83        """
  84        return (True, None) 
  85
  86    def _check_dependency_command(self, cmdline) -> tuple:
  87        """
  88        Runs a shell command (typically something --version) and returns the 
  89        output and return code as a tuple. The command to run is passed as a 
  90        string, optionally including arguments, in the `cmdline` argument.
  91        """
  92        logger.debug("Checking output of command " + cmdline)
  93        cmd = subprocess.getstatusoutput(cmdline)
  94        if cmd[0] != 0:
  95            logger.warning("{0} is not available on this system (`{1}` "
  96                    "returned code {2}: \"{3}\")".format(
  97                    cmdline.split()[0], cmdline, *cmd))
  98            return cmd
  99        else:
 100            logger.debug("Command {0} succeeded".format(cmdline))
 101            return cmd
 102
 103
 104
 105class ParserLoader:
 106    """
 107    This class searches for parsers in the main logparse package
 108    (logparser.parsers) and optionally in another external package (default
 109    /usr/share/logparse).
 110    """
 111
 112    def __init__(self, pkg=PARSER_PKG, path=PARSER_DIR):
 113        """
 114        The pkg and path attributes shouldn't need to be set on object
 115        creation, the default values should work fine. They are hard-coded here
 116        for security so that a module can't force-load a module from another
 117        package/location, e.g. from the internet.
 118        """
 119
 120        self.pkg = pkg
 121        self.path = path
 122        self.parsers = []
 123        self.has_systemd = False
 124
 125    def search(self, pattern):
 126        """
 127        Find a parser and determine its journald attribute. When a user 
 128        requests a parser of the form .*_journald, this function will use 
 129        that parser if it exists, but if not it will revert to using the 
 130        base parser (without the _journald) if it has a journald attribute. 
 131        If it does not have this error (and the parser as requested does not
 132        exist), then no parser is loaded..
 133        """
 134        # Separate into underscore words
 135        split_name = pattern.split("_")     
 136
 137        # Check if parser exists with exact name requested by user
 138        result = self._search_both(pattern)
 139
 140        if result == None and split_name[-1] == "journald":
 141            # No match for exact name but .*_journald was requested...
 142            if self.has_systemd:
 143                # Look for base parser with journald attribute
 144                result = self._search_both("".join(split_name[:-1]))
 145                if result == None:
 146                    logger.error("Couldn't find a matching parser module "
 147                        "for {0}".format(pattern))
 148                if not hasattr(result, "journald"):
 149                    logger.error("Found parser {} but it does not support "
 150                            "journald".format("".join(split_name[:-1])))
 151                    result = None
 152                else:
 153                    result.journald = True
 154            else:
 155                logger.error("A parser that requires systemd was requested "
 156                        "but the dependencies are not installed.")
 157                return None
 158
 159        if not result.deps_ok:
 160            return None
 161
 162        if result == None:
 163            # Still can't find a matching parser
 164            logger.error("Couldn't find a matching parser module "
 165                "for {0}".format(pattern))
 166        else:
 167            self.parsers.append(result)
 168
 169        return result
 170
 171    def _search_both(self, pattern):
 172        """
 173        Basic wrapper for the two search functions below.
 174        """
 175        default_parser = self._search_default(pattern)
 176        if default_parser != None:
 177            return default_parser
 178        else:
 179            user_parser = self._search_user(pattern)
 180            if user_parser != None:
 181                return user_parser
 182            else:
 183                return None
 184
 185    def _search_user(self, pattern):
 186        """
 187        Search for a parser name `pattern` in the user-managed parser directory
 188        """
 189
 190        logger.debug("Searching for {0} in {1}".format(pattern, self.path))
 191        try:
 192            spec = importlib.machinery.PathFinder.find_spec(
 193                    pattern, path=[self.path])
 194            parser_module = spec.loader.load_module(spec.name)
 195            return self._validate_module(parser_module)
 196        except Exception as e:
 197            return None
 198
 199    def _search_default(self, pattern):
 200        """
 201        Search for a parser name `pattern` in the default parser package
 202        TODO use importlib.resources.is_resources() once there is a backport
 203        to Python 3.6 or below
 204        """
 205        
 206        logger.debug("Searching for {0} in default parsers".format(pattern))
 207        try:
 208            parser_module = importlib.import_module(self.pkg + "." + pattern)
 209            return self._validate_module(parser_module)
 210        except Exception as e:
 211            return None 
 212
 213    def _validate_module(self, parser_module):
 214        """
 215        Some basic security tests for candidate modules:
 216            1. Must contain exactly one Parser object
 217            3. This class cannot be a redefinition of the base Parser class
 218            4. Must provide the parse_log() method
 219            5. Must not return None
 220            6. Must not match an already-loaded class
 221            7. Dependencies must exist
 222        """
 223
 224        logger.debug("Checking validity of module {0} at {1}".format(
 225            parser_module.__name__, parser_module.__file__))
 226        available_parsers = []
 227        missing_dependencies = []
 228        clsmembers = inspect.getmembers(parser_module, inspect.isclass)
 229
 230        # Check individual classes
 231        for (_, c) in clsmembers:
 232            if not (issubclass(c, Parser) & (c is not Parser)):
 233                continue
 234            if c in self.parsers:
 235                logger.error("Parser class {0} has already been loaded "
 236                    "from another source, ignoring it".format(
 237                        c.__class__.__name__, c.__file__))
 238            if not inspect.isroutine(c.parse_log):
 239                logger.error("Parser class {0} in {1} does not contain a "
 240                    "parse_log() method".format(
 241                        c.__class__.__name__, c.__file__))
 242                continue
 243            if None in get_type_hints(c):
 244                logger.error("Parser class {0} in {1} contains a "
 245                    "null-returning parse_log() method".format(
 246                        c.__class__.__name__, c.__file__))
 247                continue
 248            parser_obj = c()
 249            if parser_obj.deprecated:
 250                logger.warning("Parser {0} is deprecated - "
 251                    "use {1} instead".format(
 252                        parser_obj.name, parser_obj.successor))
 253            # Check dependencies
 254            deps = parser_obj.check_dependencies()
 255            if deps[0]:
 256                parser_obj.deps_ok = True
 257            else:
 258                logger.error("The following dependencies are missing for "
 259                        "parser {0}: {1}".format(parser_obj.name,
 260                            ", ".join(deps[1])))
 261                missing_dependencies.append(parser_obj)
 262                parser_obj.deps_ok = False
 263
 264            logger.debug("Found parser {0}.{1}".format(
 265                c.__module__, c.__class__.__name__))
 266            available_parsers.append(parser_obj)
 267
 268        # Check module structure
 269        if len(available_parsers) > 1:
 270            logger.error("Found multiple valid parser classes in {0} at {1} "
 271                "- ignoring this module"
 272                .format(parser_module.__name__, parser_module.__file__))
 273            return None
 274        elif len(available_parsers) == 0:
 275            if len(missing_dependencies) > 0:
 276                return None
 277            logger.error("No valid classes in {0} at {1}".
 278                    format(parser_module.__name__, parser_module.__file__))
 279            return None
 280        if len(available_parsers) == 1:
 281            logger.debug("Parser module {0} at {1} passed validity checks"
 282                    .format(parser_module.__name__, parser_module.__file__))
 283            return available_parsers[0]
 284
 285    def check_systemd(self):
 286        """
 287        Check if the appropriate dependencies are installed for parsing 
 288        systemd logs.
 289
 290        Output codes:
 291            0.    systemd + libsystemd + systemd-python are installed
 292            1.    systemd + libsystemd are installed
 293            2.    systemd is installed
 294            3.    systemd is not installed, no support required
 295        """
 296        # Test if systemctl works
 297        systemctl_cmd = Popen(["systemctl", "--version"], stdout=PIPE)
 298        systemctl_cmd.communicate()
 299
 300        if systemctl_cmd.returncode == 0:
 301            logger.debug("Passed systemctl test")
 302
 303            # Test if libsystemd exists
 304            libsystemd_cmd = Popen(["locate", "libsystemd.so.0"], stdout=PIPE)
 305            libsystemd_cmd.communicate()
 306
 307            if libsystemd_cmd.returncode == 0:
 308                logger.debug("Passed libsystemd test")
 309
 310                # Test if systemd-python exists
 311                if util.find_spec("systemd") is not None:
 312                    logger.debug("Passed systemd-python test")
 313                    self.has_systemd = True
 314                    logger.debug("Passed all systemd dependency checks")
 315                    return 0
 316                else:
 317                    logger.warning("Systemd is running on this system but the "
 318                            "package systemd-python is not installed. Parsers "
 319                            "that use journald will not work. For more "
 320                            "features, install systemd-python from "
 321                            "<https://pypi.org/project/systemd-python/> or "
 322                            "`pip install systemd-python`.")
 323                    return 1
 324            else:
 325                logger.warning("Systemd is running on this system but "
 326                        "libsystemd headers are missing. This package is "
 327                        "required to make use of the journald parsers. "
 328                        "Libsystemd should be available with your package "
 329                        "manager of choice.")
 330                return 2
 331        else:
 332            logger.debug("Systemd not installed.. parsers that use journald "
 333                    "will not work.")
 334            return 3
 335
 336
 337    def load_pkg(self):
 338        """
 339        Clear the list of currently loaded packages and load all valid and
 340        non-deprecated parser classes from self.pkg using importlib.
 341        """
 342
 343        available_parsers = [name for _, name, _ in iter_modules(
 344            [dirname(importlib.import_module(self.pkg).__file__)])]
 345        for parser_name in available_parsers:
 346            parser_module = importlib.import_module(
 347                    "logparse.parsers." + parser_name)
 348            parser_class = self._validate_module(parser_module)
 349            if parser_class == None:
 350                continue
 351            if parser_class.deprecated:
 352                logger.debug("Ignoring parser {0} because it is deprecated"
 353                        .format(parser_class.__class__.__name__))
 354                continue
 355            self.parsers.append(parser_class)
 356        return self.parsers
 357
 358    def ignore(self, pattern):
 359        """
 360        Remove a parser from the list of currently loaded parsers
 361        """
 362
 363        for parser in self.parsers:
 364            if parser.__module__ == pattern:
 365                self.parsers.remove(parser)
 366                logger.debug("Ignoring parser {0}".format(parser.__name__))
 367        return self.parsers