logparse / timeparse.pyon commit rename parsers, better journald integration (e1f7605)
   1#!/usr/bin/env python
   2# -*- coding: utf-8 -*-
   3
   4'''
   5timeparse.py
   6(c) Will Roberts <wildwilhelm@gmail.com>  1 February, 2014
   7
   8Implements a single function, `timeparse`, which can parse various
   9kinds of time expressions.
  10'''
  11
  12# MIT LICENSE
  13#
  14# Permission is hereby granted, free of charge, to any person
  15# obtaining a copy of this software and associated documentation files
  16# (the "Software"), to deal in the Software without restriction,
  17# including without limitation the rights to use, copy, modify, merge,
  18# publish, distribute, sublicense, and/or sell copies of the Software,
  19# and to permit persons to whom the Software is furnished to do so,
  20# subject to the following conditions:
  21#
  22# The above copyright notice and this permission notice shall be
  23# included in all copies or substantial portions of the Software.
  24#
  25# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32# SOFTWARE.
  33
  34import re
  35from datetime import timedelta
  36
  37SIGN        = r'(?P<sign>[+|-])?'
  38#YEARS      = r'(?P<years>\d+)\s*(?:ys?|yrs?.?|years?)'
  39#MONTHS     = r'(?P<months>\d+)\s*(?:mos?.?|mths?.?|months?)'
  40WEEKS       = r'(?P<weeks>[\d.]+)\s*(?:w|wks?|weeks?)'
  41DAYS        = r'(?P<days>[\d.]+)\s*(?:d|dys?|days?)'
  42HOURS       = r'(?P<hours>[\d.]+)\s*(?:h|hrs?|hours?)'
  43MINS        = r'(?P<mins>[\d.]+)\s*(?:m|(mins?)|(minutes?))'
  44SECS        = r'(?P<secs>[\d.]+)\s*(?:s|secs?|seconds?)'
  45SEPARATORS  = r'[,/]'
  46SECCLOCK    = r':(?P<secs>\d{2}(?:\.\d+)?)'
  47MINCLOCK    = r'(?P<mins>\d{1,2}):(?P<secs>\d{2}(?:\.\d+)?)'
  48HOURCLOCK   = r'(?P<hours>\d+):(?P<mins>\d{2}):(?P<secs>\d{2}(?:\.\d+)?)'
  49DAYCLOCK    = (r'(?P<days>\d+):(?P<hours>\d{2}):'
  50               r'(?P<mins>\d{2}):(?P<secs>\d{2}(?:\.\d+)?)')
  51
  52OPT         = lambda x: r'(?:{x})?'.format(x=x, SEPARATORS=SEPARATORS)
  53OPTSEP      = lambda x: r'(?:{x}\s*(?:{SEPARATORS}\s*)?)?'.format(
  54    x=x, SEPARATORS=SEPARATORS)
  55
  56TIMEFORMATS = [
  57    r'{WEEKS}\s*{DAYS}\s*{HOURS}\s*{MINS}\s*{SECS}'.format(
  58        #YEARS=OPTSEP(YEARS),
  59        #MONTHS=OPTSEP(MONTHS),
  60        WEEKS=OPTSEP(WEEKS),
  61        DAYS=OPTSEP(DAYS),
  62        HOURS=OPTSEP(HOURS),
  63        MINS=OPTSEP(MINS),
  64        SECS=OPT(SECS)),
  65    r'{MINCLOCK}'.format(
  66        MINCLOCK=MINCLOCK),
  67    r'{WEEKS}\s*{DAYS}\s*{HOURCLOCK}'.format(
  68        WEEKS=OPTSEP(WEEKS),
  69        DAYS=OPTSEP(DAYS),
  70        HOURCLOCK=HOURCLOCK),
  71    r'{DAYCLOCK}'.format(
  72        DAYCLOCK=DAYCLOCK),
  73    r'{SECCLOCK}'.format(
  74        SECCLOCK=SECCLOCK),
  75    #r'{YEARS}'.format(
  76        #YEARS=YEARS),
  77    #r'{MONTHS}'.format(
  78        #MONTHS=MONTHS),
  79    ]
  80
  81COMPILED_SIGN = re.compile(r'\s*' + SIGN + r'\s*(?P<unsigned>.*)$')
  82COMPILED_TIMEFORMATS = [re.compile(r'\s*' + timefmt + r'\s*$', re.I)
  83                        for timefmt in TIMEFORMATS]
  84
  85MULTIPLIERS = dict([
  86        #('years',  60 * 60 * 24 * 365),
  87        #('months', 60 * 60 * 24 * 30),
  88        ('weeks',   60 * 60 * 24 * 7),
  89        ('days',    60 * 60 * 24),
  90        ('hours',   60 * 60),
  91        ('mins',    60),
  92        ('secs',    1)
  93        ])
  94
  95def _interpret_as_minutes(sval, mdict):
  96    """
  97    Times like "1:22" are ambiguous; do they represent minutes and seconds
  98    or hours and minutes?  By default, timeparse assumes the latter.  Call
  99    this function after parsing out a dictionary to change that assumption.
 100    
 101    >>> import pprint
 102    >>> pprint.pprint(_interpret_as_minutes('1:24', {'secs': '24', 'mins': '1'}))
 103    {'hours': '1', 'mins': '24'}
 104    """
 105    if (    sval.count(':') == 1 
 106        and '.' not in sval
 107        and (('hours' not in mdict) or (mdict['hours'] is None))
 108        and (('days' not in mdict) or (mdict['days'] is None))
 109        and (('weeks' not in mdict) or (mdict['weeks'] is None))
 110        #and (('months' not in mdict) or (mdict['months'] is None))
 111        #and (('years' not in mdict) or (mdict['years'] is None))
 112        ):   
 113        mdict['hours'] = mdict['mins']
 114        mdict['mins'] = mdict['secs']
 115        mdict.pop('secs')
 116        pass
 117    return mdict
 118
 119def timeparse(sval, granularity='seconds'):
 120    '''
 121    Wrapper for the _strseconds function to convert the number of seconds to a
 122    datetime.timedelta object.
 123    '''
 124    return timedelta(seconds = strseconds(sval, granularity))
 125
 126
 127def strseconds(sval, granularity='seconds'):
 128    '''
 129    Parse a time expression, returning it as a timedelta.  If
 130    possible, the return value will be an `int`; if this is not
 131    possible, the return will be a `float`.  Returns `None` if a time
 132    expression cannot be parsed from the given string.
 133
 134    Arguments:
 135    - `sval`: the string value to parse
 136
 137    >>> timeparse('1:24')
 138    84
 139    >>> timeparse(':22')
 140    22
 141    >>> timeparse('1 minute, 24 secs')
 142    84
 143    >>> timeparse('1m24s')
 144    84
 145    >>> timeparse('1.2 minutes')
 146    72
 147    >>> timeparse('1.2 seconds')
 148    1.2
 149
 150    Time expressions can be signed.
 151
 152    >>> timeparse('- 1 minute')
 153    -60
 154    >>> timeparse('+ 1 minute')
 155    60
 156    
 157    If granularity is specified as ``minutes``, then ambiguous digits following
 158    a colon will be interpreted as minutes; otherwise they are considered seconds.
 159    
 160    >>> timeparse('1:30')
 161    90
 162    >>> timeparse('1:30', granularity='minutes')
 163    5400
 164    '''
 165    match = COMPILED_SIGN.match(sval)
 166    sign = -1 if match.groupdict()['sign'] == '-' else 1
 167    sval = match.groupdict()['unsigned']
 168    for timefmt in COMPILED_TIMEFORMATS:
 169        match = timefmt.match(sval)
 170        if match and match.group(0).strip():
 171            mdict = match.groupdict()
 172            if granularity == 'minutes':
 173                mdict = _interpret_as_minutes(sval, mdict)
 174            # if all of the fields are integer numbers
 175            if all(v.isdigit() for v in list(mdict.values()) if v):
 176                return sign * sum([MULTIPLIERS[k] * int(v, 10) for (k, v) in
 177                            list(mdict.items()) if v is not None])
 178            # if SECS is an integer number
 179            elif ('secs' not in mdict or
 180                  mdict['secs'] is None or
 181                  mdict['secs'].isdigit()):
 182                # we will return an integer
 183                return (
 184                    sign * int(sum([MULTIPLIERS[k] * float(v) for (k, v) in
 185                             list(mdict.items()) if k != 'secs' and v is not None])) +
 186                    (int(mdict['secs'], 10) if mdict['secs'] else 0))
 187            else:
 188                # SECS is a float, we will return a float
 189                return sign * sum([MULTIPLIERS[k] * float(v) for (k, v) in
 190                            list(mdict.items()) if v is not None])
 191