From e4bd9f8dff474ec37bbacccec1374a39a929fc00 Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sat, 10 Jun 2017 18:00:44 +0200 Subject: improve documentation and more sanity checks --- doc/protocol.md | 67 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 14 deletions(-) (limited to 'doc/protocol.md') diff --git a/doc/protocol.md b/doc/protocol.md index 9c4718d..fdd44f8 100644 --- a/doc/protocol.md +++ b/doc/protocol.md @@ -1,13 +1,51 @@ +Introduction +============ + +There a two types of interfaces: stateful and stateless. + +Stateful interfaces use persistent connections and send an init message after the +connection has been established. The values in this message are treated as defaults +which will be used if the corresponding value is missing in subsequent data-update +messages. In any case the values from data updates override values from init messages. + +Stateless interfaces will not use persistent connections but are datagram oriented +therfore all values must be defined in data-update messages. + + +Structure of data and meaning of data fields: +--------------------------------------------- + +Sources of data updates are called streamer. Streamer are defined by the hostname of the +machine it runs on, a content specifier (room1-audio, room2-av, audio-english, ...), +a format sepcifier (flash, webm, hls, dash, ...) and a quality specifier (high, low, ...). + +Any data update has a start time and a duration. Those two values specify the timespan +during which a source gathered the data. Both these values are processesd and stored +with millisecond precision. + +The actual data of the update consist of 3 aggregated values: client count, bytes sent and +bytes received. +Client count is the number of clients that are or have been connected for at least some +time within the timespan as specified by start time and duration. Bytes sent is the overall +number of bytes sent by the source to all the clients combined. Bytes received is the number +of bytes that the source received from it's stream producer to be sent out to the clients. +In an ideal world those three values have the following relation: + + bytes-sent = bytes-received * client-count + +In addition to aggregated data, data updates may contain a list of all connected clients. +In order to be useful any client entry must contain the IP address of the client as well +as the bytes sent to it. Client list entries might also contain the port and other +information such as user agent strings or Geo IP information. + + + Messages ======== init ---- -All fields except "version" are optional. The values in this message are treated as -defaults which will be used if the corresponding value is missing in subsequent -update messages. - { "version": 2, "SourceHubUuid": "f7df89b4-171e-4b2f-a8a4-e58ac99e5dc5", @@ -19,18 +57,12 @@ update messages. "tags": [ "elevate", "2014", "discourse" ] } +All fields except "version" are optional. + data-update ----------- -All values which have been defined by the init message are optional. In any case the -values from data updates override values from init. Stateless interfaces will not use -init messages and therefore all values must be defined here. -"SourceHubUuid", "SourceHubUpdateId", "ForwardHubUuid", "ForwardHubUpdateId", -"user-agent", "bytes-received", "tags" and "clients" might be omitted and are treated -as an empty string, 0 or empty array respectively. -The start-time will be processesd and stored with millisecond precision. - { "version": 2, "SourceHubUuid": "f7df89b4-171e-4b2f-a8a4-e58ac99e5dc5", @@ -54,6 +86,13 @@ The start-time will be processesd and stored with millisecond precision. } } +All values which have been defined by the init message are optional. +"SourceHubUuid", "SourceHubUpdateId", "ForwardHubUuid", "ForwardHubUpdateId", "tags", +"data.bytes-received" and "data.clients" might be omitted and are treated as an empty +string, 0 or empty array respectively. If "clients" is present "port" and "user-agent" +fields of the entries might be empty or missing. Also in this case "data.client-count" +and "data.bytes-sent" might be 0 or omitted as those values will be calculated from +the contents of "data.clients" by the hub while ingesting the data. In addition to the user-agent string a client entry may have the following geo-info fields (all of which might be omitted): @@ -63,5 +102,5 @@ fields (all of which might be omitted): "region-code" ..... the 2-letter code for the region as defined by the MaxMind GeoIP2 database "city" ............ the name of the city - "latitude" ........ latitude in ° as float value - "longitude" ....... longitude in ° as float value + "latitude" ........ latitude in degrees as float value + "longitude" ....... longitude in degrees as float value -- cgit v1.2.3