from IPython.display import HTML
from IPython.display import Markdown
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import pylab
import numpy as np
from IPython.display import Image
import sys
import seaborn as sns
from matplotlib.colors import LogNorm
from cycler import cycler
import itertools
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
code_show=true;
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
<style>.container { width:95% !important; }</style>
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')
# Set the date to previous business day
myd = (pd.datetime.today() - pd.tseries.offsets.BDay(1))
myDate = myd.strftime('%Y%m%d')
#override day here
#myDate ='20180914'
plotColors=[(0,0,153/255),(75/255,155/255,1),(210/255,210/255,210/255),
(180/255,225/255,1),(110/255,110/255,110/255),(5/255,99/255,193/255),
(110/255,110/255,110/255),(110/255,215/255,0),(215/255,235/255,1),(165/255,230/255,0)]
marketColors = { 'bg': { 'XEUR' : '#727272', 'XETR' : '#181813' },
'text': { 'XEUR' : '#333333', 'XETR' : '#0c519e' },
'marker': { 'XEUR' : plotColors[0], 'XETR' : plotColors[1] } }
# Define some global values
# closup low
t3a_t3n_lowmin = 0
t3a_t3n_lowmax = 2000
mint3a = 2500
maxt3a = 4000
mint3n = mint3a
maxt3n = 4000
binWidth = 2
binrange = np.arange(mint3a,maxt3a,binWidth)
windowsize = 25
# dictionaries we need later
partCondition=dict()
ts=dict()
t9_t3n=dict()
t9_t9d=dict()
t3a_t3n=dict()
t9d_t3a=dict()
t9d_t3n=dict()
net=dict()
t3a_t3n_estimate=dict()
t9d_t3a_estimate=dict()
t9_t9d_median=dict()
t9d_t3a_median=dict()
t3a_t3n_residual=dict()
t3nCorr=dict()
t9dCorr=dict()
t9d_t3n_corr= dict()
emptySeries = dict()
partitions = dict()
allpartitions = list()
topproducts = dict()
marketNames = { 'XEUR': 'Eurex', 'XETR' : 'Xetra'}
# a couple of useful functions
# convert missing values to zero
def timeConverter(num):
try:
return np.int64(num)
except:
return np.int64(0)
# percentile calculation
def percentile(n):
def percentile_(x):
return np.percentile(x, n)
percentile_.__name__ = 'percentile_%s' % n
return percentile_
df = pd.DataFrame()
dfall = pd.DataFrame()
def ReadFile(filename):
return pd.read_csv(fileName,sep=";",
converters = {'ETICaptTime' : timeConverter,
'EOBICaptTime' : timeConverter,
'TransactTime' : timeConverter,
'AggressorTime': timeConverter,
'RequestTime' : timeConverter },
usecols=['MarketSegmentID', 'ExecID',
'PartitionID', 'AggressorSide', 'LastQty', 'LastPx',
'RequestTime', # t3n
'AggressorTime', # t5
'TransactTime', # t9
'EOBICaptTime', # t9d
'ETICaptTime' # t3a
],
dtype = {'MarketSegmentID':np.int32,
'ExecID':np.int64,
'PartitionID':np.int8,
'AggressorSide':np.int8,
'LastQty':np.int64},
).sort_values(by=['ExecID'])
def CalculateTriggers(df):
# search for the latest EOBI packet that precedes a given aggressive order by <minDelay> ns
# assume this packet was the trigger for the aggressing order
# minimum delay is cable length plus t9d to t9a, 2 x 181.5 m * 5 ns/m + 915 ns [delay t_9d => t_9a]
minDelay=2730
t_3a = df['ETICaptTime'].values
t_9d = df['EOBICaptTime'].values
t_9 = df['TransactTime'].values
t_3 = df['RequestTime'].values
msID = df['MarketSegmentID'].values
# t_9d of triggering EOBI packet
df['trigger_t9d'] = t_9d[t_9d.searchsorted(t_3a-minDelay)-1]
# t_9 of triggering EOBI packet
df['trigger_t9'] = t_9[t_9d.searchsorted(t_3-minDelay)-1]
# t_9 of triggering EOBI packet
df['trigger_t3'] = t_3[t_9d.searchsorted(t_3a-minDelay)-1]
#triggering market segment ID
df['triggermsID'] = msID[t_9d.searchsorted(t_3a-minDelay)-1]
# time in datetime format for later plotting
df['dt']=pd.to_datetime(df['RequestTime'])
# calculate the delay trigger to capture time (t_9d to t_3a)
df['t9d_t3a'] = (df['ETICaptTime'] - df['trigger_t9d']) * (df['ETICaptTime'] > 0)
# calculate the delay trigger to gateway time (t_9d to t_3n)
df['t9d_t3n'] = (df['RequestTime'] - df['trigger_t9d']) * (df['RequestTime'] > 0)
# calculate the delay trigger to gateway time (t_9 to t_3n)
df['t9_t3n'] = (df['RequestTime'] - df['trigger_t9']) * (df['RequestTime'] > 0)
# calculate the app to wire delay of triggering EOBI packet (t_9 to t_9d)
df['t9_t9d'] = (df['trigger_t9d'] - df['trigger_t9']) * (df['RequestTime'] > 0)
def CalculateLatency(df):
t_3 = df['RequestTime'].values
meIn = df['AggressorTime'].values
execID = df['ExecID'].values
mdOut = df['TransactTime'].values
df['t3n_t5'] = meIn-t_3
df['t5_t7'] = execID - meIn
df['t7_t9'] = mdOut - execID
# EOBI latency app to wire (t9 to t9d)
df['t9_t9d'] = df['EOBICaptTime'] - df['TransactTime']
# account for offset between T7 server times and network capture times
timeOffset = 0
# latency of ETI request through network (t3a to t3n)
df['t3a_t3n'] = df['RequestTime'] - df['ETICaptTime'] - timeOffset
# latency of ETI request through network (t3a to t3n)
df['t3a_t7'] = df['ExecID'] - df['ETICaptTime'] - timeOffset
fastCondition = (df['t3a_t3n'] < t3a_t3n_lowmax) & (df['t3a_t3n'] > t3a_t3n_lowmin)
allpartitions = list()
for market in ['XEUR','XETR']:
partitions[market] = list(dfall[dfall['market']==market].dropna().groupby('PartitionID').size().index)
allpartitions = allpartitions + partitions[market]
t9_t9d_median[market] = df[df['market'] == market].dropna()['t9_t9d'].median()
t3nCorr[market] = df[(df['market'] == market) & fastCondition].dropna()['t3a_t3n'].median()
t9dCorr[market] = df[df['market'] == market].dropna()['t9_t9d'].median()
for partition in partitions[market]:
emptySeries[partition] = pd.Series()
partCondition[partition]=(df['PartitionID'] == partition) & fastCondition
condition=partCondition[partition]
ts[partition] = df['RequestTime'][condition].dropna()
t9_t3n[partition] = df['t9_t3n'][condition].dropna()
t9_t9d[partition] = df['t9_t9d'][condition].dropna()
t3a_t3n[partition] = df['t3a_t3n'][condition].dropna()
t9d_t3a[partition] = df['t9d_t3a'][condition].dropna()
t9d_t3n[partition] = df['t9d_t3n'][condition].dropna()
net[partition] = df['t3a_t3n'][condition].dropna()
t3a_t3n_estimate[partition] = df['t3a_t3n'][condition].dropna().ffill().rolling(window=windowsize, center=True).median().ffill().bfill()
t9d_t3a_estimate[partition] = t9d_t3n[partition] - t3a_t3n_estimate[partition]
t9d_t3a_median[partition] = t9_t3n[partition] - t9dCorr[market] - t3nCorr[market]
t3a_t3n_residual[partition] = net[partition] - t3a_t3n_estimate[partition]
t9d_t3n_corr[partition] = t9d_t3n[partition] - t3nCorr[market]
for partition in allpartitions:
a = dfall[dfall['PartitionID'] == partition].dropna().groupby(['sprod']).size() # ['LastQty'].sum()
topproducts[partition] = list(a.sort_values(ascending=False).head(5).index)
def TranslateProds(df):
prodFile = '/local/neusseb/workspace/mml-tools/hpt/products.csv'
products = pd.read_csv(prodFile, sep=";")
cgFile = '/local/neusseb/workspace/mml-tools/hpt/capgroups.csv'
capgroups = pd.read_csv(cgFile, sep=";")
partFile = '/local/neusseb/workspace/mml-tools/hpt/partitions.csv'
ps = pd.read_csv(partFile, sep=";")
mFile = '/local/neusseb/workspace/mml-tools/hpt/markets.csv'
ms = pd.read_csv(mFile, sep=";")
prodMap = dict(zip(products.ID,products.Name))
prodToCap = dict(zip(products.ID,products.ParentID))
capToPart = dict(zip(capgroups.ID,capgroups.ParentID))
part2Market=dict(zip(ps.ID,ps.ParentID))
market2Name =dict(zip(ms.ID,ms.Name))
df['market'] = df.MarketSegmentID.map(prodToCap).map(capToPart).map(part2Market).map(market2Name)
df['prod'] = df.MarketSegmentID.map(prodToCap).map(capToPart).map(part2Market).map(market2Name) + '-' + df.MarketSegmentID.map(prodMap)
df['sprod'] = df.MarketSegmentID.map(prodMap)
df['triggerProd'] = df.triggermsID.map(prodToCap).map(capToPart).map(part2Market).map(market2Name) + '-' + df.triggermsID.map(prodMap)
def DrawPartitions(market, type, minTime, maxTime, lowlimit, highlimit,plotEstimate, showMedian = True):
# subplots
cols = 2
rows = 5
pylab.rcParams['figure.figsize'] = (20, 12)
f, axarr = plt.subplots(rows, cols, sharex='col',sharey='row')
miny = np.round(df[type][(df[type]<highlimit) & (df['market'] == market) ].quantile(0.01),-2)-100
maxy = np.round(df[type][(df[type]<highlimit) & (df['market'] == market)].quantile(0.99),-2)+100
timeCondition = (df['RequestTime'] >= minTime) & (df['RequestTime'] <= maxTime)
for partition in partitions[market]:
condition = (df['PartitionID'] == partition) & timeCondition
#cr = plotColors[partition- min(partitions[market])]
cr = marketColors['marker'][market]
sp = axarr[(partition - min(partitions[market]))// cols,(partition + min(partitions[market]))% cols ]
sp.grid(color='grey', linestyle='dotted', linewidth=1, axis='y')
sp.set_facecolor('xkcd:white')
sp.set_ylim([miny,maxy])
ts = df['dt'].where(condition).dropna()
net = df[type].where(condition).dropna()
if (showMedian):
sp.set_title('Partition %s (median latency %d ns)'%(partition,net.median()), fontsize=18,color=marketColors['text'][market])
else:
sp.set_title('Partition %s'%(partition), fontsize=18,color=marketColors['text'][market])
markerSize = 3
if (plotEstimate):
windowsize=15
markerSize = 3
sp.plot_date(ts, net,color=cr, tz=cet, marker='.', markersize=markerSize)
estimate = df[type][condition].dropna().where((df[type] < maxy) & (df[type] > miny) ).ffill().rolling(window=windowsize, center=True).median().bfill().ffill()
sp.plot_date(ts, estimate,color='black', tz=cet,lw=1,ls='-',markersize=0)
else:
sp.plot_date(ts, net,color=cr, tz=cet, marker='.', markersize=markerSize)
dummy = plt.suptitle('%s\n%s for executions'%(marketNames[market], type ), fontsize=26, color=marketColors['text'][market])
plt.subplots_adjust( wspace=0.2, hspace=0.5)
plt.show()
def DrawPartitionSeries(market, series, minTime, maxTime, lowlimit, highlimit):
# subplots
cols = 2
rows = 5
pylab.rcParams['figure.figsize'] = (20, 12)
f, axarr = plt.subplots(rows, cols, sharex='col',sharey='row')
for partition in partitions[market]:
timeCondition = (ts[partition] >= minTime) & (ts[partition] <= maxTime)
condition = timeCondition
#cr = plotColors[partition- min(partitions[market])]
cr = marketColors['marker'][market]
sp = axarr[(partition - min(partitions[market]))// cols,(partition + min(partitions[market]))% cols ]
sp.grid(color='grey', linestyle='dotted', linewidth=1, axis='y')
sp.set_facecolor('xkcd:white')
sp.set_ylim([lowlimit,highlimit])
myTs = pd.to_datetime(ts[partition].where(condition).dropna())
mySeries = series[partition][condition].dropna()
#sp.set_title('Partition %s (median latency %d ns)'%(partition,mySeries.median()), fontsize=18,color=marketColors['text'][market])
markerSize = 3
sp.plot_date(myTs, mySeries,color=cr, tz=cet, marker='.', markersize=markerSize)
dummy = plt.suptitle('%s\n%s for executions'%(marketNames[market], type ), fontsize=26, color=marketColors['text'][market])
plt.subplots_adjust( wspace=0.2, hspace=0.5)
plt.show()
def DrawPartitionHist(series, series2, binRange, market, title, printQuartiles):
# subplots
cols = 2
rows = 5
pylab.rcParams['figure.figsize'] = (20, 12)
f, axarr = plt.subplots(rows, cols, sharex='col',sharey='row')
for partition in partitions[market]:
sp = axarr[((partition - min(partitions[market]))// cols),(partition + min(partitions[market]))% cols ]
#cr = plotColors[partition- min(partitions[market])]
cr = marketColors['marker'][market]
sp.grid(color='grey', linestyle='dotted', linewidth=1, axis='x')
sp.set_facecolor('xkcd:white')
if (printQuartiles):
sp.set_title('Partition %s (quartile %d, %d, %d)'%(partition, series[partition].quantile(0.25), series[partition].quantile(0.5), series[partition].quantile(0.75)), fontsize=18)
else:
sp.set_title('Partition %s '%(partition), fontsize=18)
sp.hist( series[partition],bins=binRange,color=cr, alpha=1)
if (series2[partition].size > 0):
sp.hist( series2[partition],bins=binRange,color='grey', alpha=0.5)
dummy = plt.suptitle(title, fontsize=26, color=marketColors['text'][market])
plt.subplots_adjust( wspace=0.2, hspace=0.5)
plt.show()
def DrawPartitionHistSingle(givenPartition, series, series2, binMin,binMax,bucket, market, title,seriesName,seriesName2, printQuartiles):
# subplots
cols = 1
rows = 1
yt = [0,25,50,75]
ytick =[int(i/bucket) for i in yt]
binRange = np.arange(binMin,binMax,bucket)
pylab.rcParams['figure.figsize'] = (30,15)
f, axarr = plt.subplots(rows, cols, sharex='col',sharey='row')
for partition in [givenPartition]:
sp = axarr
#cr = plotColors[partition- min(partitions[market])]
cr = marketColors['marker'][market]
sp.grid(color='grey', linestyle='dotted', linewidth=1, axis='x')
sp.set_facecolor('xkcd:white')
sp.hist( series[partition],bins=binRange,color='blue', alpha=1)
if (series2[partition].size > 0):
sp.hist( series2[partition],bins=binRange,color='grey', alpha=0.5)
sp.set_xticks(range(binMin,binMax, 200))
sp.set_xticklabels(range(binMin,binMax, 200),size=30, weight='bold')
sp.set_yticklabels(ytick,size=20, weight='bold')
sp.set_yticks(yt)
#sp.set_yticklabels([0,25,50,75],size=20, weight='bold')
plt.savefig('%s_%s_%s.png'%(givenPartition, seriesName, seriesName2),dpi=None, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format=None,
transparent=False, bbox_inches=None, pad_inches=0.1,
frameon=None)
plt.show()
def DrawScatter(series1, series2, xlabel, ylabel, tmin, tmax, title):
pylab.rcParams['figure.figsize'] = (30, 30)
scatter_axes = plt.subplot2grid((5, 5), (1, 0), rowspan=4, colspan=4)
x_hist_axes = plt.subplot2grid((5, 5), (0, 0), colspan=4, sharex=scatter_axes)
y_hist_axes = plt.subplot2grid((5, 5), (1, 4), rowspan=4,sharey=scatter_axes)
plt.suptitle(title, fontsize=26)
scatter_axes.set_xlim([tmin,tmax])
scatter_axes.set_ylim([tmin,tmax])
y_hist_axes.set_ylim([tmin,tmax])
x_hist_axes.set_xlim([tmin,tmax])
binWidth = 1
binrangex = np.arange(tmin,tmax,binWidth)
binrangey = binrangex
scatter_axes.set_xlabel(xlabel)
scatter_axes.set_ylabel(ylabel)
maxHist = np.round(series1.value_counts().max()*binWidth,-2)+50
x_hist_axes.set_ylim([0,maxHist])
y_hist_axes.set_xlim([0,maxHist])
plt.setp(x_hist_axes.get_xticklabels(), visible=False)
plt.setp(y_hist_axes.get_yticklabels(), visible=False)
scatter_axes.scatter(series1,series2, marker='.',s=2)
scatter_axes.grid(color='grey', linestyle='dotted', linewidth=1)
hist, edges = np.histogram(series1, bins=binrangex)
maxima = list(map(lambda x: (edges[x],hist[x]),argrelmax(hist, order=15)))
#print(list(zip(maxima[0][0],maxima[0][1])))
#x_hist_axes.bar(edges[:-1], hist, width=0.5,linewidth = 0)
for a in list(zip(maxima[0][0],maxima[0][1])):
if (a[1] > 1E-04):
x_hist_axes.annotate('%s'%a[0], xy=a,xycoords='data', textcoords='offset points',xytext=(-15,15))
x_hist_axes.hist(series1, bins=binrangex, alpha=0.75)
x_hist_axes.grid(color='grey', linestyle='dotted', linewidth=1, axis='x')
hist, edges = np.histogram(series2, bins=binrangey)
maxima = list(map(lambda x: (edges[x],hist[x]),argrelmax(hist, order=15)))
#print(list(zip(maxima[0][0],maxima[0][1])))
#x_hist_axes.bar(edges[:-1], hist, width=0.5,linewidth = 0)
for a in list(zip(maxima[0][1],maxima[0][0])):
if (a[1] > 1E-04):
y_hist_axes.annotate('%s'%a[1], xy=a,xycoords='data', textcoords='offset points',xytext=(5,0))
y_hist_axes.hist(series2, orientation='horizontal', bins=binrangey, alpha=0.75)
y_hist_axes.grid(color='grey', linestyle='dotted', linewidth=1, axis='y')
scatter_axes.set_xlabel(xlabel, fontsize=16)
scatter_axes.set_ylabel(ylabel, fontsize=16)
x_hist_axes.set_ylabel('#observations', fontsize=16)
y_hist_axes.set_xlabel('#observations', fontsize=16)
plt.subplots_adjust( wspace=0.1, hspace=0.1)
plt.show()
Markdown('''# Analysis of HPT file (Date %s)
The following notebook is based on the High Precision Timestamp file of ** %s**, also see the [Deutsche Börse membersection](https://member.deutsche-boerse.com/).
It demonstrates what kind of information can be extracted from the file.
It does NOT contain any other information than those contained in the HPT file.
If you want to map your own dataset then we suggest you take the MarketSegmentID + ExecID of your executions (or those that you missed) and map them to the corresponding dataset contained in the HPT file.
Note that not all of the executions have a corresponding network capture timestamp, as we only capture order entry traffic originating from the 10 GBit CoLo 2.0 network.
The time synchronization of the capturing devices is implemented using a white rabbit network in combination with pps breakouts. This leads to very stable and almost time drift free network level timestamps with an error of sub +/- 5 ns.
The timestamps taken by T7 applications and the gateway network card are time synched using a ptp network which leads to higher time drifts in the order of up to +/- 100 ns (see details below).
The HPT file contains all timestamps from the execution summary, i.e.
* RequestTime (t_3n),
* AggressorTime (t_5),
* ExecID (t_7) and
* TransactTime (t_9)
and additionally
* ETICaptTime (t_3a) of the agressing order (if available) and
* EOBICaptTime (t_9d)
The following figure shows where the timestamps are taken:'''%(myd.strftime('%Y-%m-%d'),myd.strftime('%A %d %B %Y')))
# @hidden_cell
Image(filename="t7_network.png")
We have measured the following median network delays
$t_{9d}$ to $t_{9a}$: 914 ns
(1) $t_{9a} - t_{9d} = 914 ns = $ midpoint tap($t_{9d}$ to $t_{9a}$) $ + A_m - D_m $
minimum time between trigger market data $t_{9d}$ and $t_{3a}$ then becomes
(2) $(t_{3a} - t_{9d})_{min} = 2\cdot911 ns + $ midpoint tap($t_{9d}$ to $t_{9a}$) $ - D_m + A $
and with (1)
(3) $(t_{3a} - t_{9d})_{min} = 2\cdot911 ns + 914 ns - A_m + A = 2736 ns + (A-A_m)$
where $A$, $A_m$ and $D_m$ are the the mid tap to capture time delay at $t_{3a}$, $t_{9a}$ and $t_{9d}$ respectively.
Cable length between taps and metamako timestamping devices are 2m at $t_{9a}$, 3m at $t_{9d}$, $t_{9a'}$, $t_{3d}$ and $t_{4d}$ and 5m at $t_{3a}$,$t_{3a'}$,$t_{4a}$ and $t_{4a'}$, so
(4) $A-A_m = 15 ns$
and
(5) $(t_{3a} - t_{9d})_{min} = 2751 ns$
# read in hpt file
fileName = 'hpt_%s_complete.csv'%(myDate)
dfall = ReadFile(fileName)
CalculateTriggers(dfall)
TranslateProds(dfall)
CalculateLatency(dfall)
# filter out rows that don't have a ETICaptTime or RequestTime set
df=dfall[(dfall.RequestTime != 0) & (dfall.ETICaptTime != 0)]
# min and max of the day
minTime = min(df['RequestTime'])
maxTime = max(df['RequestTime'])
import pytz
cet=pytz.timezone('Europe/Berlin')
pylab.rcParams['figure.figsize'] = (20, 10)
tpl = dict()
hpl = dict()
binr = dict()
yr = dict()
# set y axis ranges
yr[0] = [df['t3a_t3n'].min(),df['t3a_t3n'].max()]
maxAbs = 150000
if yr[0][1] > maxAbs:
yr[0][1] = maxAbs
if yr[0][0] < -1 * maxAbs:
yr[0][0] = -1 * maxAbs
yr[1] = [40000,yr[0][1] if yr[0][1] <= 80000 else 80000]
yr[2] = [500,2500]
for i in [0,1,2]:
if i == 0:
tpl[i] = plt.subplot2grid((3, 5), (i, 0), rowspan=1, colspan=4)
else:
tpl[i] = plt.subplot2grid((3, 5), (i, 0), rowspan=1, colspan=4, sharex=tpl[0])
hpl[i] = plt.subplot2grid((3, 5), (i, 4), colspan=1, sharey=tpl[i])
hpl[i].set_xscale("log")
tpl[i].set_ylim(yr[i][0],yr[i][1])
binr[i] = np.arange(yr[i][0],yr[i][1],(yr[i][1]-yr[i][0])/200)
tpl[0].plot_date(df['dt'], df['t3a_t3n'], tz=cet, marker='.', markersize=1, mec='black')
hpl[0].hist(df['t3a_t3n'], orientation='horizontal', bins=binr[0], alpha=0.75, color='black')
for market in ['XETR','XEUR']:
series=df['t3a_t3n'].where(df['market'] == market).dropna()
times = df['dt'].where(df['market'] == market).dropna()
for i in [1,2]:
tpl[i].plot_date(times, series, tz=cet, label=marketNames[market], marker='.', ms=1, mec=marketColors['marker'][market],alpha=0.75)
hpl[i].hist(series, orientation='horizontal', bins=binr[i], alpha=0.75, label=marketNames[market], color=marketColors['marker'][market])
# tpl[2].plot_date(times, series, tz=cet, label=marketNames[market], marker='.', ms=1, mec=marketColors['marker'][market],alpha=0.75)
# hpl[2].hist(series, orientation='horizontal', bins=binr[2], alpha=0.75, label=marketNames[market], color=marketColors['marker'][market])
for i in [1,2]:
hpl[i].legend(markerscale=10)
tpl[i].legend(markerscale=10)
for i in [0,1,2]:
plt.setp(hpl[i].get_yticklabels(), visible=False)
plt.setp(tpl[0].get_xticklabels(), visible=False)
d= plt.setp(tpl[1].get_xticklabels(), visible=False)
The plot below shows the network transport time t_3a to t_3n over the course of the day (all times are UTC). We can distinguish two latency domains, one above 40 µs and the other one below 4 µs.
The upper band consists of requests that are not optmized for latency. These are either routed to low frequency gateways or are routed to high frequency gateways on the other side (e.g. a requests sent via cross connect on side A targeting a gateway on side B).
There are also a few requests that have a negative network latency. This is an artefact of the way the solarflare wire order delivery api delivers timestamps. In case more than one request arrives on the same session within a very short time frame, the packets may be handed over to the application in one callback. This callback hands over the timestamp of the first message, and thus may lead to negative network latency.
plt.show()
In the following we concentrate on the requests sent via optimal network connection that usually have a latency below 4 µs.
The network latency of these requests is influenced by three factors
In order to show the time synch quality of each gateway individually the below plots are split by partition.
DrawPartitions('XEUR','t3a_t3n',minTime, maxTime, 0, t3a_t3n_lowmax,False)
DrawPartitions('XETR','t3a_t3n',minTime, maxTime, 0, t3a_t3n_lowmax,False)
The measured latency of market data between the market data publisher (t_9) and receiver (t_11) can be broken down as follows
The application to wire latency also traverses time domains between the market data dissiminator (software timestamp) and network capture devices (synchronized via White Rabbit and pps). The following diagrams show the latency between the SendingTime (t_9) and distribution layer capture timestamp.
DrawPartitions('XEUR','t9_t9d',minTime, maxTime, 0,15000, False)
DrawPartitionHist(t9_t9d, emptySeries, np.arange(0,4000,10),'XEUR','Market Data Application to Wire latency', True)
from scipy.signal import argrelextrema, argrelmax
histmin=0
histmax = 4000
binwidth = 10
for partition in [1]:
hist, edges = np.histogram(t9_t9d[partition], bins=np.arange(histmin,histmax,binwidth), normed = True)
maxima = list(map(lambda x: (edges[x],hist[x]),argrelmax(hist, order=10)))
#print(list(zip(maxima[0][0],maxima[0][1])))
plt.bar(edges[:-1], hist, width=binwidth)
for a in list(zip(maxima[0][0],maxima[0][1])):
if (a[1] > 1E-04):
plt.annotate('%s'%a[0], xy=a,xycoords='data', textcoords='offset points',xytext=(-15, 5))
plt.show()
#print('Partition %2d, %d'%(partition,t9_t9d[partition].mode()[0]))
A two hour closup shows that the different gateways exibit different levels of time drift. Some gateways exibit a regular wave like drift of varying magnitude, while others exhibit an irregular drift.
We use a rolling median to approximate the time drift at any given point in time. A static windowsize was chosen for simplicity. The size was chosen such that the confidence interval of the first and third quartile is minimal.
Queuing effects lead to varying network delays (the spots above the 'base' delay).
DrawPartitions('XEUR','t3a_t3n',(minTime + maxTime )/ 2 - 3600E09, (minTime + maxTime) / 2 + 3600E09, 0, t3a_t3n_lowmax,True)
DrawPartitions('XETR','t3a_t3n',(minTime + maxTime )/ 2 - 3600E09, (minTime + maxTime) / 2 + 3600E09, 0, t3a_t3n_lowmax,True)
With the approximation of the time drift by a rolling median, there is some residual jitter left.
The below charts show the residuals when subtracting the rolling median.
There is a band around 0, these are requests that traverse the network without queuing and makae up the vast majority.
Requests with a residual t_3a to t_3n latency of more than 50 ns are subject to network queuing in most cases.
DrawPartitionSeries('XEUR',t3a_t3n_residual,minTime, maxTime, -200,200 )
The deviation from rolling median for those requests that are not queued in the network is due to jitter in timestamp taking and in the network.
The below shows this jitter to be of the magnitude +/- 8 ns (confidence interval first to third quartile).
DrawPartitionHist(t3a_t3n_residual, emptySeries, np.arange(-50,50,1),'XEUR','Network delay residuals', True)
Besides the time drift and jitter queuing does occur in the network when two requests are aimed at the same target and the request arriving earlier blocks the output port of the switch.
There are two points in the network were this queuing may occur:
The below analysis will touch upon these aspects.
Assuming a preceeding execution 'triggers' the next next trade we can calculate the reation time between the receive time of an executing order and the preceeding execution.
The expected order entry time of a request triggered by an execution can be calculated as
and thus \begin{equation} reactionTime = t_3a_n - t_9d_{n-1} \end{equation} where \begin{equation} t_3a_n > t_9d_{n-1} + minNetworkDelay \end{equation}
The 'minimum network delay' has the following components
Below we show how the HPT file can help to compare reaction times on nanosecond level, as the timestamps t_3a and t_9d are subject to less jitter and exclude any network queueing of the incoming requests. We start by looking at reaction times based only on timestamps contained in the EOBI protocol.
The EOBI protocol contains the gateway order entry time t_3n of the aggressing order (RequestTime) and the application level send time (TransactTime) of the EOBI packet for every execution.
The below plot shows the distribution of the reaction time based on t_3n and t_9, corrected by the median latency t_3a to t_3n and t_9 to t_9d for later comparison.
DrawPartitions('XEUR','t9_t3n', minTime, maxTime,0, 2000000, False, False)
DrawPartitionHistSingle(3,t9d_t3n_corr, emptySeries,2600,4000,5, market, 't9d_t3n_corr','t9d_t3n_corr','5', False)
DrawPartitionHistSingle(3,t9d_t3a_estimate, t9d_t3n_corr,2600,4001,1, market, 't9d_t3n (time synch corrected)','t9d_t3a_estimate','t9d_t3n_corr', False)
DrawPartitionHistSingle(3,t9d_t3a, t9d_t3a_estimate,2600,4001,1, market, 't9d_t3n (time synch corrected)','t9d_t3a','t9d_t3a_estimate', False)
market='XEUR'
DrawPartitionHist(t9d_t3a_median, emptySeries,np.arange(0,25000,100), market, 'Reaction time t_9 to t_3 (%s)\nCorrected by median t9=>t9d (%d ns) and t3a=>t3n (%d ns)'%(marketNames[market],t9dCorr[market],t3nCorr[market]), False)
market='XEUR'
DrawPartitionHist(t9d_t3a_median, emptySeries,np.arange(2000,4000,10), market, 'Reaction time t_9 to t_3 closeup (%s)\nCorrected by median t9=>t9d (%d ns) and t3a=>t3n (%d ns)'%(marketNames[market],t9dCorr[market],t3nCorr[market]), False)
# market='XETR'
# DrawPartitionHist(t9d_t3a_median, emptySeries, binrange, market, 'Reaction time t_9 to t_3 for %s\nCorrected by median t9=>t9d (%d ns) and t3a=>t3n (%d ns)'%(market,t9dCorr[market],t3nCorr[market]), False)
Using the network level timestamp t_9d instead of t_9 eliminates the jitter introduced by the network card and the time drift between t9 and t_9d.
The below plot shows the distribution of the reaction time based on t_3n and t_9d, corrected by the median latency t_3a to t_3n.
The distribution is a bit narrower as the original distribution as expected.
market='XEUR'
DrawPartitionHist(t9d_t3n_corr, t9d_t3a_median, np.arange(2000,4000,10), market, 'Reaction time t_9d to t_3n (%s)\nCorrected by median t3a=>t3n (%d ns)'%(market,t3nCorr[market]), False)
When taking into account the dynamic part of the time drift using a rolling window median the latency histogram reveals much more structure.
market='XEUR'
DrawPartitionHist(t9d_t3a_estimate, t9d_t3n_corr, np.arange(2500,4000,10), market, 'Reaction time t_9d to t_3n (%s)\nCorrected by rolling window median t3a=>t3n '%(market), False)
Using ETICaptTime (t_3a) from the HPT file the reaction time histogram reveals the real reaction time, not being influenced by time drift and more importantly by network queuing and jitter.
The distribution is not only sharper, it also reveals reaction times not contained in the distributions based on t_3n.
market='XEUR'
DrawPartitionHist(t9d_t3a, t9d_t3a_estimate, np.arange(2500,3500,2), market, '%s\nSpectrum of measured t_3a - t_9d'%(market), False)
partition=1
for partition in [1,2,3,4,50,58]:
DrawScatter(t9d_t3a[partition],
t9d_t3a_estimate[partition],
"Reaction time t9d to t3a (ns)",
"Estimated reaction time based on t3n and median rolling delay t3a to t3n (ns)",
2750, 3200,
'\nPartition %d\nreaction time (estimated) vs reaction time (real)'%(partition))
Below we compare the T7 application latencies for aggressing orders with fast trigger reaction times (< 5 µs) versus those with higher reaction times. We plot the paths gateway to matcher, matcher pre execution and matcher execution to market data out. The higher T7 latencies for the faster reaction times indicates higher load at that times, i.e. more competition.
def AnnotateHist(subPlot, series, binrange):
hist, edges = np.histogram(series, bins=binrange)
maxima = list(map(lambda x: (edges[x],hist[x]),argrelmax(hist, order=15)))
maxHist = max(maxima[0][1])
for a in list(zip(maxima[0][0],maxima[0][1])):
if (a[1] > maxHist /2):
subPlot.annotate('%s'%(a[0]), xy=a,xycoords='data', textcoords='offset points',xytext=(-15,5))
marketSegmentId = 589
partition = 1
pylab.rcParams['figure.figsize'] = (20, 8)
lowmax = 5000
condition= ( df['PartitionID'] == partition ) & ( df['MarketSegmentID'] == marketSegmentId ) & ( df['t9d_t3a'] < lowmax )
binrangeapp = np.arange(0,100000,100)
plt.title('Market Segment %d (Partition %d): T7 Application latency for fast reactions' %(marketSegmentId, partition))
plt.xlabel("Latency (ns)")
plt.ylabel("#observations")
plt.xlim([0,100000])
n,a,b = plt.hist(df['t3n_t5'][condition].dropna(), bins=binrangeapp,color=plotColors[0], alpha=0.75, label='t_3n to t_5')
n,a,b = plt.hist(df['t5_t7'][condition].dropna(), bins=binrangeapp,color=plotColors[2], alpha=0.75, label='t_5 to t_7')
n,a,b = plt.hist(df['t7_t9'][condition].dropna(), bins=binrangeapp,color=plotColors[1], alpha=0.75, label='t7_ to t_9')
AnnotateHist(plt,df['t3n_t5'][condition].dropna(),binrangeapp)
AnnotateHist(plt,df['t5_t7'][condition].dropna(),binrangeapp)
AnnotateHist(plt,df['t7_t9'][condition].dropna(),binrangeapp)
plt.legend()
plt.show()
condition=(df['PartitionID'] == partition) & (df['MarketSegmentID'] == marketSegmentId) & (df['t9d_t3a'] >= lowmax)
pylab.rcParams['figure.figsize'] = (20, 8)
ts = df['RequestTime'][condition].dropna()
meIn = df['AggressorTime'][condition].dropna()
execID = df['ExecID'][condition].dropna()
mdOut = df['TransactTime'][condition].dropna()
#
plt.title('Market Segment %d (Partition %d): T7 Application latency for slower reactions' %(marketSegmentId, partition))
plt.xlabel("Latency (ns)")
plt.ylabel("#observations")
plt.xlim([0,100000])
n,a,b = plt.hist(meIn- ts, bins=binrangeapp,color=plotColors[0], alpha=0.75, label='t_3n to t_5')
n,a,b = plt.hist(execID- meIn, bins=binrangeapp,color=plotColors[2], alpha=0.75, label='t_5 to t_7')
n,a,b = plt.hist(mdOut- execID, bins=binrangeapp,color=plotColors[1], alpha=0.75, label='t7_ to t_9')
AnnotateHist(plt,meIn-ts,binrangeapp)
AnnotateHist(plt,execID- meIn,binrangeapp)
AnnotateHist(plt,mdOut- execID,binrangeapp)
plt.legend()
plt.show()
The below gives the correlation matrix between different products, i.e. trades of products in rows trigger prompt executions in products in the columns
condition = (df['t9d_t3a'] < 4000) & (df['t9d_t3a'] > 2750)
t=pd.DataFrame(df[condition].groupby(by=['triggerProd','prod']).size())
%matplotlib inline
fig, ax = plt.subplots(figsize=(50,20))
a= t.unstack(fill_value=0)[0].sort_index(axis=1).sort_index(axis=0)
triggerMax = 0
threshold = 200
for s in a.columns:
if (a[s].max() > triggerMax):
triggerMax = a[s].max()
if a[s].sum() < threshold:
a=a.drop(s, axis=1)
for s in a.index:
if a.loc[s].sum() < threshold and not (s in a.columns) :
a=a.drop(s, axis =0)
b= pd.DataFrame(a)
a= a.where(a>0).fillna(1).astype(int)
hm=sns.heatmap(a, norm=LogNorm(1,triggerMax), cbar_kws={"ticks":[0,1,10,1e2,1e3,1e4,1e5]}, cmap='RdYlGn_r', linewidths=0.0, annot=b, annot_kws={"size": 15, 'color' : 'black'}, fmt="d", ax=ax)
for label in hm.get_yticklabels():
label.set_size(15)
label.set_weight("bold")
label.set_rotation('horizontal')
if label.get_text() in ["XEUR-FESX", 'XEUR-FDAX']:
label.set_color("red")
for label in hm.get_xticklabels():
label.set_size(15)
label.set_weight("bold")
label.set_rotation('vertical')
if label.get_text() in ["XEUR-FESX", 'XEUR-FDAX']:
label.set_color("red")
In case of questions or comments please do not hesitate to contact us at monitoring@deutsche-boerse.com or Sebastian.Neusuess@deutsche-boerse.com.
© Deutsche Börse 2018 Deutsche Börse AG (DBAG), Clearstream Banking AG (Clearstream), Eurex Frankfurt AG, Eurex Clearing AG (Eurex Clearing) as well as Eurex Bonds GmbH (Eurex Bonds) and Eurex Repo GmbH (Eurex Repo) are corporate entities and are registered under German law. Eurex Zürich AG is a corporate entity and is registered under Swiss law. Clearstream Banking S.A. is a corporate entity and is registered under Luxembourg law. U.S. Exchange Holdings, Inc. is a corporate entity and is registered under U.S. American law. Deutsche Boerse Asia Holding Pte. Ltd., Eurex Clearing Asia Pte. Ltd. and Eurex Exchange Asia Pte. Ltd are corporate entities and are registered under Singapore law. Eurex Frankfurt AG (Eurex) is the administrating and operating institution of Eurex Deutschland. Eurex Deutschland and Eurex Zürich AG are in the following referred to as the “Eurex Exchanges”. All intellectual property, proprietary and other rights and interests in this publication and the subject matter hereof (other than certain trademarks and service marks listed below) are owned by DBAG and its affiliates and subsidiaries including, without limitation, all patent, registered design, copyright, trademark and service mark rights. While reasonable care has been taken in the preparation of this publication to provide details that are accurate and not misleading at the time of publication DBAG, Clearstream, Eurex, Eurex Clearing, Eurex Bonds, Eurex Repo as well as the Eurex Exchanges and their respective servants and agents (a) do not make any representations or warranties regarding the information contained herein, whether express or implied, including without limitation any implied warranty of merchantability or fitness for a particular purpose or any warranty with respect to the accuracy, correctness, quality, completeness or timeliness of such information, and (b) shall not be responsible or liable for any third party’s use of any information contained herein under any circumstances, including, without limitation, in connection with actual trading or otherwise or for any errors or omissions contained in this publication. This publication is published for information purposes only and shall not constitute investment advice respectively does not constitute an offer, solicitation or recommendation to acquire or dispose of any investment or to engage in any other transaction. This publication is not intended for solicitation purposes but only for use as general information. All descriptions, examples and calculations contained in this publication are for illustrative purposes only. Eurex and Eurex Clearing offer services directly to members of the Eurex exchanges respectively to clearing members of Eurex Clearing. Those who desire to trade any products available on the Eurex market or who desire to offer and sell any such products to others or who desire to possess a clearing license of Eurex Clearing in order to participate in the clearing process provided by Eurex Clearing, should consider legal and regulatory requirements of those jurisdictions relevant to them, as well as the risks associated with such products, before doing so. Eurex derivatives are currently not available for offer, sale or trading in the United States or by United States persons (other than EURO STOXX 50® Index Futures, EURO STOXX 50® ex Financials Index Futures, EURO STOXX® Select Dividend 30 Index Futures, EURO STOXX® Index Futures, EURO STOXX® Large/Mid/Small Index Futures, STOXX® Europe 50 Index Futures, STOXX® Europe 600 Index Futures, STOXX® Europe 600 Banks/Industrial Goods & Services/Insurance/Media/Travel & Leisure/Utilities Futures, STOXX® Europe Large/Mid/Small 200 Index Futures, Dow Jones Global Titans 50 IndexSM Futures (EUR & USD), DAX®/Mini-DAX®/MDAX®/TecDAX® Futures, SMIM® Futures, SLI Swiss Leader Index® Futures, MSCI World (FMWO, FMWP, FMWN)/Europe (FMEU, FMEP)/ Europe Value/Europe Growth/Emerging Markets (FMEM, FMEF, FMEN)/Emerging Markets Latin America/Emerging Markets EMEA/Emerging Markets Asia/China Free/India/Japan/Malaysia/South Africa/Thailand/AC Asia Pacific ex Japan Index Futures, TA-25 Index Futures, Daily Futures on TAIEX Futures, VSTOXX® Futures, Gold and Silver Futures as well as Eurex FX, property and interest rate derivatives). Trademarks and Service Marks Buxl®, DAX®, DivDAX®, eb.rexx®, Eurex®, Eurex Bonds®, Eurex Repo®, Eurex Strategy WizardSM, Euro GC Pooling®, FDAX®, FWB®, GC Pooling®,,GCPI®, MDAX®, ODAX®, SDAX®, TecDAX®, USD GC Pooling®, VDAX®, VDAX-NEW® and Xetra® are registered trademarks of DBAG. All MSCI indexes are service marks and the exclusive property of MSCI Barra. ATX®, ATX® five, CECE® and RDX® are registered trademarks of Vienna Stock Exchange AG. IPD® UK Annual All Property Index is a registered trademark of Investment Property Databank Ltd. IPD and has been licensed for the use by Eurex for derivatives. SLI®, SMI® and SMIM® are registered trademarks of SIX Swiss Exchange AG. The STOXX® indexes, the data included therein and the trademarks used in the index names are the intellectual property of STOXX Limited and/or its licensors Eurex derivatives based on the STOXX® indexes are in no way sponsored, endorsed, sold or promoted by STOXX and its licensors and neither STOXX nor its licensors shall have any liability with respect thereto. Dow Jones is a service mark of Dow Jones & Company, Inc. All derivatives based on these indexes are not sponsored, endorsed, sold or promoted by Dow Jones & Company, Inc. Dow Jones & Company, Inc. does not make any representation regarding the advisability of trading or of investing in such products. Bloomberg Commodity IndexSM and any related sub-indexes are service marks of Bloomberg L.P. All references to London Gold and Silver Fixing prices are used with the permission of The London Gold Market Fixing Limited as well as The London Silver Market Fixing Limited, which for the avoidance of doubt has no involvement with and accepts no responsibility whatsoever for the underlying product to which the Fixing prices may be referenced. PCS® and Property Claim Services® are registered trademarks of ISO Services, Inc. Korea Exchange, KRX, KOSPI and KOSPI 200 are registered trademarks of Korea Exchange Inc. Taiwan Futures Exchange and TAIFEX are registered trademarks of Taiwan Futures Exchange Corporation. Taiwan Stock Exchange, TWSE and TAIEX are the registered trademarks of Taiwan Stock Exchange Corporation. BSE and SENSEX are trademarks/service marks of Bombay Stock Exchange (BSE) and all rights accruing from the same, statutory or otherwise, wholly vest with BSE. Any violation of the above would constitute an offence under the laws of India and international treaties governing the same. The names of other companies and third party products may be trademarks or service marks of their respective owners.
HTML('''Code toggle: click <a href="javascript:code_toggle()">here</a>.''')
# assign true/false depending on eti capture time being there
dfall['hasETITime'] = dfall['ETICaptTime']>0
# group by MarketSegmentID
grouped = dfall.groupby(by=['MarketSegmentID'])
# group by MarketSegmentID & eti existence
groupedeti = dfall.groupby(by=['MarketSegmentID','hasETITime'])
# a gives you the top 20 products by count
a = grouped.size().sort_values(ascending=False).head(n=20)
# b gives you the number of messages with ETICaptTime > 0 by product
b = dfall[dfall['ETICaptTime']>0].groupby('MarketSegmentID').size().sort_values(ascending=False)
# complete new dataframe with the same index as the top 20 products
matchRate = pd.DataFrame(index = a.index, columns=['mr','cnt','eti'])
# a and b have MarketSegmentID as index
print('Top 20 match ratios')
print('MarketSegment Cnt Eti Ratio')
# for each value in matchRate df assign number of messages, number of messages with ETICaptTime > 0 and Ratio
for i,row in matchRate.iterrows():
row['eti'] = b[i]
row['cnt'] = a[i]
row['mr'] =(100.0 * b[i])/(1.0 * a[i])
print('%s\t%d\t%d\t%.2f%%'%(i,row['cnt'], row['eti'], row['mr']))