From 6b7f81934add1d58e46e18478fe0e163761a6564 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sun, 12 Oct 2014 22:25:57 +0200 Subject: parsing data from log lines works now --- src/daq/accesslog/sfive-accesslog.py | 37 +++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/daq/accesslog/sfive-accesslog.py b/src/daq/accesslog/sfive-accesslog.py index c8f4b40..94a0309 100755 --- a/src/daq/accesslog/sfive-accesslog.py +++ b/src/daq/accesslog/sfive-accesslog.py @@ -39,6 +39,7 @@ from time import sleep import re import simplejson as json import datetime +import dateutil.parser _MAX_PACKET_SIZE = 8192 # in bytes @@ -160,20 +161,34 @@ class AccessLog(): self._proto.sendDatagram('%s\n' % (json.dumps(initdata))); def _prepareLineRegex(self): -## 85.127.147.192 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "" "" -## 91.119.202.141 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "-" "" -## 91.119.202.141 - - [24/Oct/2013:12:10:36 +0200] "GET /elevate-live/av-orig-hls-low-10915.ts HTTP/1.1" 200 386340 "-" "" -## 85.127.147.192 - - [24/Oct/2013:12:10:37 +0200] "GET /elevate-live/av-orig-hls-low.m3u8 HTTP/1.1" 200 315 "" "" -## 85.127.147.192 - - [24/Oct/2013:12:10:37 +0200] "GET /elevate-live/av-orig-hls-low-10915.ts HTTP/1.1" 200 386340 "" "" parts = [ r'(?P\S+)', r'\S+', r'\S+', r'\[(?P.+)\]', r'"(?P.+)"', r'(?P[0-9]+)', r'(?P\S+)', r'"(?P.*)"', r'"(?P.*)"'] return re.compile(r'\s+'.join(parts)+r'\s*\Z') + def _parseRequest(self, reqstr): + req = { 'cmd': None, 'url': None, 'proto': None } + try: + parts = reqstr.split() + req['cmd'] = parts[0] if parts[0] != '-' else None + req['url'] = parts[1] + req['proto'] = parts[2] + except IndexError: + pass + + return req + + def _parseDatetime(self, datetimestr): + try: + return dateutil.parser.parse(datetimestr[:11] + " " + datetimestr[12:], dayfirst=True) + except ValueError as e: + return None + def _parseLine(self, regex, line): linedata = regex.match(line).groupdict() - if linedata["client"] == "-": - linedata["client"] = None + for part in ("client", "ref", "ua"): + if linedata[part] == "-": + linedata[part] = None linedata["status"] = int(linedata["status"]) @@ -182,9 +197,8 @@ class AccessLog(): else: linedata["size"] = int(linedata["size"]) - if linedata["ref"] == "-": - linedata["ref"] = None - + linedata['req'] = self._parseRequest(linedata['req']) + linedata['ts'] = self._parseDatetime(linedata['ts']) return linedata def _sendLogData(self): @@ -195,8 +209,9 @@ class AccessLog(): linedata = self._parseLine(regex, line) cnt += 1 print linedata + print - if cnt > 10: + if cnt >= 10: break # TODO: create datasets using parser and call _sendDataset() -- cgit v1.2.3