From 64409f010a719751dce97afe6c608dc8a91e6c6d Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sun, 12 Oct 2014 21:40:32 +0200 Subject: log line regex works --- src/daq/accesslog/sfive-accesslog.py | 57 +++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/src/daq/accesslog/sfive-accesslog.py b/src/daq/accesslog/sfive-accesslog.py index 1337f1c..c8f4b40 100755 --- a/src/daq/accesslog/sfive-accesslog.py +++ b/src/daq/accesslog/sfive-accesslog.py @@ -36,6 +36,7 @@ from twisted.internet import protocol, reactor, unix import socket from time import sleep +import re import simplejson as json import datetime @@ -104,6 +105,7 @@ class AccessLog(): self._quality = properties['quality'] self._tags = properties['tags'] self._logfile = properties['logfile'] + self._nameformat = properties['nameformat'] self._proto = None self._conn = None @@ -116,9 +118,18 @@ class AccessLog(): def _initLog(self): try: self._fd = open(self._logfile, 'r') + regex = self._nameformat % { 'hostname': self._hostname, + 'content-id': self._content_id, + 'format': self._format, + 'quality': self._quality } + self._file_re = re.compile(regex) + print "will be looking for files like '%s'" % regex except IOError as e: print 'SFive: error opening logfile: %s' % (e.strerror) return False + except re.error as e: + print 'SFive: regex error: %s' % (e) + return False return True @@ -139,6 +150,7 @@ class AccessLog(): cnt = self._sendLogData() print 'SFive: sent %d datasets' % (cnt) + self._fd.close() reactor.stop() def _sendInit(self): @@ -147,9 +159,50 @@ class AccessLog(): "tags": self._tags } self._proto.sendDatagram('%s\n' % (json.dumps(initdata))); + def _prepareLineRegex(self): +## 85.127.147.192 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "" "" +## 91.119.202.141 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "-" "" +## 91.119.202.141 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low-10915.ts HTTP/1.1" 200 386340 "-" "" +## 85.127.147.192 - - [24/Oct/2013:12:10:37 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "" "" +## 85.127.147.192 - - [24/Oct/2013:12:10:37 +0200] "GET /elevate-live/av-orig-hls-low-10915.ts HTTP/1.1" 200 386340 "" "" + parts = [ r'(?P\S+)', r'\S+', r'\S+', r'\[(?P.+)\]', r'"(?P.+)"', + r'(?P[0-9]+)', r'(?P\S+)', r'"(?P.*)"', r'"(?P.*)"'] + return re.compile(r'\s+'.join(parts)+r'\s*\Z') + + def _parseLine(self, regex, line): + linedata = regex.match(line).groupdict() + + if linedata["client"] == "-": + linedata["client"] = None + + linedata["status"] = int(linedata["status"]) + + if linedata["size"] == "-": + linedata["size"] = 0 + else: + linedata["size"] = int(linedata["size"]) + + if linedata["ref"] == "-": + linedata["ref"] = None + + return linedata + def _sendLogData(self): cnt = 0 - # TODO: create datasets using parser and call _sendDataset() + try: + regex = self._prepareLineRegex() + for line in self._fd: + linedata = self._parseLine(regex, line) + cnt += 1 + print linedata + + if cnt > 10: + break + # TODO: create datasets using parser and call _sendDataset() + + except re.error as e: + print 'SFive: regex error: %s' % (e) + return cnt def _sendDataset(self, timestamp, duration, client_count, bytes_sent): @@ -181,6 +234,8 @@ if __name__ == '__main__': help='tag to be added to the statistic data, can be invoked several times') parser.add_argument('--logfile', '-l', dest='logfile', required=True, help='path to the logfile') + parser.add_argument('--nameformat', '-F', dest='nameformat', required=True, + help='the format for filenames which are part of this stream, this may include python string expressions') args = vars(parser.parse_args()) if not args['tags']: args['tags'] = [] -- cgit v1.2.3