summaryrefslogtreecommitdiff
path: root/src/daq/accesslog
diff options
context:
space:
mode:
authorChristian Pointner <equinox@spreadspace.org>2014-10-12 22:25:57 +0200
committerChristian Pointner <equinox@spreadspace.org>2014-10-12 22:25:57 +0200
commit6b7f81934add1d58e46e18478fe0e163761a6564 (patch)
treebef3bdd6a9bdf6ff085d8b370b09c85eb2a7c511 /src/daq/accesslog
parentlog line regex works (diff)
parsing data from log lines works now
Diffstat (limited to 'src/daq/accesslog')
-rwxr-xr-xsrc/daq/accesslog/sfive-accesslog.py37
1 files changed, 26 insertions, 11 deletions
diff --git a/src/daq/accesslog/sfive-accesslog.py b/src/daq/accesslog/sfive-accesslog.py
index c8f4b40..94a0309 100755
--- a/src/daq/accesslog/sfive-accesslog.py
+++ b/src/daq/accesslog/sfive-accesslog.py
@@ -39,6 +39,7 @@ from time import sleep
import re
import simplejson as json
import datetime
+import dateutil.parser
_MAX_PACKET_SIZE = 8192 # in bytes
@@ -160,20 +161,34 @@ class AccessLog():
self._proto.sendDatagram('%s\n' % (json.dumps(initdata)));
def _prepareLineRegex(self):
-## 85.127.147.192 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "<referrer>" "<UA>"
-## 91.119.202.141 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "-" "<UA>"
-## 91.119.202.141 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low-10915.ts HTTP/1.1" 200 386340 "-" "<UA>"
-## 85.127.147.192 - - [24/Oct/2013:12:10:37 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "<referer>" "<UA>"
-## 85.127.147.192 - - [24/Oct/2013:12:10:37 +0200] "GET /elevate-live/av-orig-hls-low-10915.ts HTTP/1.1" 200 386340 "<referer>" "<UA>"
parts = [ r'(?P<client>\S+)', r'\S+', r'\S+', r'\[(?P<ts>.+)\]', r'"(?P<req>.+)"',
r'(?P<status>[0-9]+)', r'(?P<size>\S+)', r'"(?P<ref>.*)"', r'"(?P<ua>.*)"']
return re.compile(r'\s+'.join(parts)+r'\s*\Z')
+ def _parseRequest(self, reqstr):
+ req = { 'cmd': None, 'url': None, 'proto': None }
+ try:
+ parts = reqstr.split()
+ req['cmd'] = parts[0] if parts[0] != '-' else None
+ req['url'] = parts[1]
+ req['proto'] = parts[2]
+ except IndexError:
+ pass
+
+ return req
+
+ def _parseDatetime(self, datetimestr):
+ try:
+ return dateutil.parser.parse(datetimestr[:11] + " " + datetimestr[12:], dayfirst=True)
+ except ValueError as e:
+ return None
+
def _parseLine(self, regex, line):
linedata = regex.match(line).groupdict()
- if linedata["client"] == "-":
- linedata["client"] = None
+ for part in ("client", "ref", "ua"):
+ if linedata[part] == "-":
+ linedata[part] = None
linedata["status"] = int(linedata["status"])
@@ -182,9 +197,8 @@ class AccessLog():
else:
linedata["size"] = int(linedata["size"])
- if linedata["ref"] == "-":
- linedata["ref"] = None
-
+ linedata['req'] = self._parseRequest(linedata['req'])
+ linedata['ts'] = self._parseDatetime(linedata['ts'])
return linedata
def _sendLogData(self):
@@ -195,8 +209,9 @@ class AccessLog():
linedata = self._parseLine(regex, line)
cnt += 1
print linedata
+ print
- if cnt > 10:
+ if cnt >= 10:
break
# TODO: create datasets using parser and call _sendDataset()