summaryrefslogtreecommitdiff
path: root/src/daq
diff options
context:
space:
mode:
authorChristian Pointner <equinox@spreadspace.org>2014-10-12 21:40:32 +0200
committerChristian Pointner <equinox@spreadspace.org>2014-10-12 21:40:32 +0200
commit64409f010a719751dce97afe6c608dc8a91e6c6d (patch)
treec398419c9a875e7a54c34e94b826d8e5b6b4647e /src/daq
parentimproved error handling (diff)
log line regex works
Diffstat (limited to 'src/daq')
-rwxr-xr-xsrc/daq/accesslog/sfive-accesslog.py57
1 files changed, 56 insertions, 1 deletions
diff --git a/src/daq/accesslog/sfive-accesslog.py b/src/daq/accesslog/sfive-accesslog.py
index 1337f1c..c8f4b40 100755
--- a/src/daq/accesslog/sfive-accesslog.py
+++ b/src/daq/accesslog/sfive-accesslog.py
@@ -36,6 +36,7 @@ from twisted.internet import protocol, reactor, unix
import socket
from time import sleep
+import re
import simplejson as json
import datetime
@@ -104,6 +105,7 @@ class AccessLog():
self._quality = properties['quality']
self._tags = properties['tags']
self._logfile = properties['logfile']
+ self._nameformat = properties['nameformat']
self._proto = None
self._conn = None
@@ -116,9 +118,18 @@ class AccessLog():
def _initLog(self):
try:
self._fd = open(self._logfile, 'r')
+ regex = self._nameformat % { 'hostname': self._hostname,
+ 'content-id': self._content_id,
+ 'format': self._format,
+ 'quality': self._quality }
+ self._file_re = re.compile(regex)
+ print "will be looking for files like '%s'" % regex
except IOError as e:
print 'SFive: error opening logfile: %s' % (e.strerror)
return False
+ except re.error as e:
+ print 'SFive: regex error: %s' % (e)
+ return False
return True
@@ -139,6 +150,7 @@ class AccessLog():
cnt = self._sendLogData()
print 'SFive: sent %d datasets' % (cnt)
+ self._fd.close()
reactor.stop()
def _sendInit(self):
@@ -147,9 +159,50 @@ class AccessLog():
"tags": self._tags }
self._proto.sendDatagram('%s\n' % (json.dumps(initdata)));
+ def _prepareLineRegex(self):
+## 85.127.147.192 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "<referrer>" "<UA>"
+## 91.119.202.141 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "-" "<UA>"
+## 91.119.202.141 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low-10915.ts HTTP/1.1" 200 386340 "-" "<UA>"
+## 85.127.147.192 - - [24/Oct/2013:12:10:37 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "<referer>" "<UA>"
+## 85.127.147.192 - - [24/Oct/2013:12:10:37 +0200] "GET /elevate-live/av-orig-hls-low-10915.ts HTTP/1.1" 200 386340 "<referer>" "<UA>"
+ parts = [ r'(?P<client>\S+)', r'\S+', r'\S+', r'\[(?P<ts>.+)\]', r'"(?P<req>.+)"',
+ r'(?P<status>[0-9]+)', r'(?P<size>\S+)', r'"(?P<ref>.*)"', r'"(?P<ua>.*)"']
+ return re.compile(r'\s+'.join(parts)+r'\s*\Z')
+
+ def _parseLine(self, regex, line):
+ linedata = regex.match(line).groupdict()
+
+ if linedata["client"] == "-":
+ linedata["client"] = None
+
+ linedata["status"] = int(linedata["status"])
+
+ if linedata["size"] == "-":
+ linedata["size"] = 0
+ else:
+ linedata["size"] = int(linedata["size"])
+
+ if linedata["ref"] == "-":
+ linedata["ref"] = None
+
+ return linedata
+
def _sendLogData(self):
cnt = 0
- # TODO: create datasets using parser and call _sendDataset()
+ try:
+ regex = self._prepareLineRegex()
+ for line in self._fd:
+ linedata = self._parseLine(regex, line)
+ cnt += 1
+ print linedata
+
+ if cnt > 10:
+ break
+ # TODO: create datasets using parser and call _sendDataset()
+
+ except re.error as e:
+ print 'SFive: regex error: %s' % (e)
+
return cnt
def _sendDataset(self, timestamp, duration, client_count, bytes_sent):
@@ -181,6 +234,8 @@ if __name__ == '__main__':
help='tag to be added to the statistic data, can be invoked several times')
parser.add_argument('--logfile', '-l', dest='logfile', required=True,
help='path to the logfile')
+ parser.add_argument('--nameformat', '-F', dest='nameformat', required=True,
+ help='the format for filenames which are part of this stream, this may include python string expressions')
args = vars(parser.parse_args())
if not args['tags']:
args['tags'] = []