diff options
author | Christian Pointner <equinox@spreadspace.org> | 2014-10-12 22:25:57 +0200 |
---|---|---|
committer | Christian Pointner <equinox@spreadspace.org> | 2014-10-12 22:25:57 +0200 |
commit | 6b7f81934add1d58e46e18478fe0e163761a6564 (patch) | |
tree | bef3bdd6a9bdf6ff085d8b370b09c85eb2a7c511 /src | |
parent | log line regex works (diff) |
parsing data from log lines works now
Diffstat (limited to 'src')
-rwxr-xr-x | src/daq/accesslog/sfive-accesslog.py | 37 |
1 files changed, 26 insertions, 11 deletions
diff --git a/src/daq/accesslog/sfive-accesslog.py b/src/daq/accesslog/sfive-accesslog.py index c8f4b40..94a0309 100755 --- a/src/daq/accesslog/sfive-accesslog.py +++ b/src/daq/accesslog/sfive-accesslog.py @@ -39,6 +39,7 @@ from time import sleep import re import simplejson as json import datetime +import dateutil.parser _MAX_PACKET_SIZE = 8192 # in bytes @@ -160,20 +161,34 @@ class AccessLog(): self._proto.sendDatagram('%s\n' % (json.dumps(initdata))); def _prepareLineRegex(self): -## 85.127.147.192 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "<referrer>" "<UA>" -## 91.119.202.141 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "-" "<UA>" -## 91.119.202.141 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low-10915.ts HTTP/1.1" 200 386340 "-" "<UA>" -## 85.127.147.192 - - [24/Oct/2013:12:10:37 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "<referer>" "<UA>" -## 85.127.147.192 - - [24/Oct/2013:12:10:37 +0200] "GET /elevate-live/av-orig-hls-low-10915.ts HTTP/1.1" 200 386340 "<referer>" "<UA>" parts = [ r'(?P<client>\S+)', r'\S+', r'\S+', r'\[(?P<ts>.+)\]', r'"(?P<req>.+)"', r'(?P<status>[0-9]+)', r'(?P<size>\S+)', r'"(?P<ref>.*)"', r'"(?P<ua>.*)"'] return re.compile(r'\s+'.join(parts)+r'\s*\Z') + def _parseRequest(self, reqstr): + req = { 'cmd': None, 'url': None, 'proto': None } + try: + parts = reqstr.split() + req['cmd'] = parts[0] if parts[0] != '-' else None + req['url'] = parts[1] + req['proto'] = parts[2] + except IndexError: + pass + + return req + + def _parseDatetime(self, datetimestr): + try: + return dateutil.parser.parse(datetimestr[:11] + " " + datetimestr[12:], dayfirst=True) + except ValueError as e: + return None + def _parseLine(self, regex, line): linedata = regex.match(line).groupdict() - if linedata["client"] == "-": - linedata["client"] = None + for part in ("client", "ref", "ua"): + if linedata[part] == "-": + linedata[part] = None linedata["status"] = int(linedata["status"]) @@ -182,9 +197,8 @@ class AccessLog(): else: linedata["size"] = int(linedata["size"]) - if linedata["ref"] == "-": - linedata["ref"] = None - + linedata['req'] = self._parseRequest(linedata['req']) + linedata['ts'] = self._parseDatetime(linedata['ts']) return linedata def _sendLogData(self): @@ -195,8 +209,9 @@ class AccessLog(): linedata = self._parseLine(regex, line) cnt += 1 print linedata + print - if cnt > 10: + if cnt >= 10: break # TODO: create datasets using parser and call _sendDataset() |