Hello all,
Been a long time.
Code computes the Matrix Profile,
then finds the best motif (pattern) and discord (outlier).
Other matches can be produced.
The motif and discord regions of interest are highlighted as green/red rectangles using gfx.
The reference site(s) has examples and tutorials which work as described.
There is opportunity to use GPU and dask (also distributed) to accelerate computations
Ability to stream data and update Matrix Profile on the fly.
Hope this helps start the investigations, since there are many applications and ideas to try
regards
FE.
example display here
AFL file below
_N(pdir = "C:\\Proj\\Stumpy\\"); // proj directory
_N(pjn = "Stumpy"); // project name
_N(pfn = pjn+" (1.0.0)"); // python filename
_N(ctxt = pjn+" (1.0.0)"); // context-this AFL filename
/* DESC
Via chart parameter settings
- Clean signal / normalise
- plot
- plot timeseries estimate
- pyPlot
*/
/* Notes
- enable in python script (amiArgs,amiRtn),
allows amiPy print of AFL inputs and return matrix in interpretation window
- display data & results in python
allows python to save results to file
- full barindex data used
*/
//////////////////////////////////////////////////////////////////////////////
// LOGGING
//PyChangeLoggingOptions("","wsemd");
//////////////////////////////////////////////////////////////////////////////
// PARAMETERS
pSig = ParamTrigger("Determine Matrix Profile","Click");
// pass data --> save to csv/noSave --> find_profile using df
// use csv --> read TS csv --> get_profile using df
// prepare TS--> either read TS, df --> find_profile using df
// quotes --> select col(s) as TS --> find_profile using df
pPass = ParamList("Pass data","Yes|Yes_CSV|Use_CSV|Only_Prepare|Quotes",0);
pNbars = Param("Last N Bars Closing Prices",20,10,1000,1);
pWind = Param("Window Length",32,3,2500,15);
_N(startDate = ParamDate("Start Date","1-1-2021",1));
_N(endDate = ParamDate("End Date","28-7-2022",1));
pChart = ParamToggle("Chart result","N|Y",0);
pHat = ParamToggle("Chart Estimate Signal","N|Y",0);
pAsv = ParamTrigger("Clear StaticVars (Proj)","Click");
pSsv = ParamTrigger("Clear StaticVars (Stock)","Click");
pPsv = ParamToggle("Persistent StaticVars","N|Y",0);
//////////////////////////////////////////////////////////////////////////////
// CONSTANTS
//////////////////////////////////////////////////////////////////////////////
// VARIABLES
bi = BarIndex();
DN = DateNum();
fvb = FirstVisibleValue(bi);
lvb = LastVisibleValue(bi);
lvx = Status("lastvisiblebar"); // barcount-1+blanks
blank = lvx-lvb;
tot = lvb-fvb+1;
//////////////////////////////////////////////////////////////////////////////
// TICKER
// remove Norgate suffix
cnt = StrFind(Name(),".au");
if(cnt!=0) { ticker = StrLeft(Name(),cnt-1); }
else { ticker = Name(); }
// INITIALISE
// removes staticvars from project
if(pAsv) {
StaticVarRemove(pjn+"*");
}
// removes staticvars from stock
if(pSsv) {
StaticVarRemove(ctxt+"_"+ticker+"*");
}
//////////////////////////////////////////////////////////////////////////////
// INPUTS
field = Close;
//////////////////////////////////////////////////////////////////////////////
// lOAD PYTHON SCRIPT
PyLoadFromFile(ctxt,pdir+pfn+".py");
//////////////////////////////////////////////////////////////////////////////
// arrange the data
// pass data --> save to csv/noSave --> find_profile using df
if(pPass=="Yes" OR pPass=="Yes_CSV") {
sz_df = PyEvalFunction(ctxt,"Get_TS",DN,field,ticker);
// optionally, save timeseries as csv
if(sz_df>0 AND pPass=="Yes_CSV") {
PyEvalFunction(ctxt,"Save_TS_csv");
}
}
else if(pPass=="Use_CSV") {
// use csv --> read TS csv --> get_profile using df
PyEvalFunction(ctxt,"Get_TS_csv",ticker);
}
else if(pPass=="Only_Prepare") {
// prepare TS --> either read TS, df --> find_profile using df
// Prepare Data for date range
PyEvalFunction(ctxt,"Prepare_TS",startDate,EndDate);
}
else if(pPass=="Quotes") {
// quotes --> then select col(s) to use as TS --> find_profile using df
PyEvalFunction(ctxt,"Quotes",O,H,L,C,V);
}
//////////////////////////////////////////////////////////////////////////////
//matrix_profile structure:
//mp-col 0 -> motif values
// -col 1 -> idx to nearest (either direction)
// -col 2 -> idx to nearest left side
// -col 3 -> idx to nearest right side
tmp = Null;
if(pSig) { // Find/Get the matrix_profile returned as [matrix]?
Profile = PyEvalFunction(ctxt,"Get_Profile",pWind);
StaticVarSet(ctxt+"_"+ticker+"_mp",Profile,pPsv);
//if(typeof(Profile)=="matrix") {
// get the Motif index, Discord index & and their nearest_neighbour index
motif = PyEvalFunction(ctxt,"Find_Motif",pWind);
motif_idx = motif[0][0];
nearest_motif = motif[1][0];
discord = PyEvalFunction(ctxt,"Find_Discord",pWind);
discord_idx = discord[0][0];
nearest_discord = discord[1][0];
// save the findings
StaticVarSet(ctxt+"_"+ticker+"_motif_idx",motif_idx,pPsv);
StaticVarSet(ctxt+"_"+ticker+"_nearest_motif",nearest_motif,pPsv);
StaticVarSet(ctxt+"_"+ticker+"_discord_idx",discord_idx,pPsv);
StaticVarSet(ctxt+"_"+ticker+"_nearest_discord",nearest_discord,pPsv);
// print the findings
printf("\nThe motif idx is %g",motif_idx);
printf("\nThe nearest motif idx is %g",nearest_motif);
printf("\nThe discord idx is %g",discord_idx);
printf("\nThe nearest discord idx is %g",nearest_discord);
//}
//else { rows = cols = 0; }
}
// get saved matrix profile, use overlay to display motifs & anomolies
mp = StaticVarGet(ctxt+"_"+ticker+"_mp");
rows = MxGetSize(mp,0);;
cols = MxGetSize(mp,1);
motif_idx = StaticVarGet(ctxt+"_"+ticker+"_motif_idx");
nearest_motif = StaticVarGet(ctxt+"_"+ticker+"_nearest_motif");
discord_idx = staticVarGet(ctxt+"_"+ticker+"_discord_idx");
nearest_discord = StaticVarGet(ctxt+"_"+ticker+"_nearest_discord");
//////////////////////////////////////////////////////////////////////////////
// CHARTING
SetChartBkColor(ColorRGB(0,0,0));
SetChartOptions(0,chartShowArrows|chartShowDates);
Plot(C,"C",colorYellow,styleCandle|styleNoTitle,Null,Null,0,0,1);
///////////////////////////////////////////////////////////////////////////////
// Graphics parameters
GfxSetOverlayMode(0); // low-level graphic is overlaid on top of charts
GfxSetCoordsMode(3); // X coordinate is bar index, Y is pixel
GfxSelectPen(colorYellow);
GfxSelectSolidBrush(colorYellow);
pxchartbottom = Status("pxchartbottom");
pxcharttop = Status("pxcharttop");
pxchartheight = Status("pxchartheight");
//---------------------------------------------------------------------------------------
// plot matrix profile
aMp = MxGetBlock(mp,0,rows-1,0,0,True); // matrix profile value as array
miny = Lowest(aMp);
maxy = Highest(aMp);
pxScaleY = (pxchartbottom-pxcharttop-60)/(maxy-miny);
pxMP = pxchartbottom - 0.955*((aMp-miny) * pxScaleY + miny) - 30;
GfxMoveTo(fvb,pxMP[fvb]); // since current cursor is at last visible bar price)
for(i=fvb; i<=lvb; i++) {
if(i>motif_idx AND i<=motif_idx+pWind) { //hightlight motif region
GfxSelectPen(colorBrightGreen,2);
GfxLineTo(i, pxMP[i]);
}
if(i>nearest_motif AND i<=nearest_motif+pWind) { //hightlight nearest motif region
GfxSelectPen(colorBrightGreen,2);
GfxLineTo(i, pxMP[i]);
}
if(i>discord_idx AND i<=discord_idx+pWind) { //hightlight discord region
GfxSelectPen(colorRed,2);
GfxLineTo(i, pxMP[i]);
}
if(i>nearest_discord AND i<=nearest_discord+pWind) { //hightlight nearest discord region
GfxSelectPen(colorRed,2);
GfxLineTo(i, pxMP[i]);
}
GfxSelectPen(colorWhite,1);
GfxLineTo(i, pxMP[i]);
}
GfxSelectPen(colorGreen, 2, 3 );
GfxSetTextAlign(6); GfxSetTextColor(colorGreen);
GfxTextOut("Motif (bi= "+motif_idx+")", motif_idx, pxchartbottom-30);
GfxTextOut("nearest_Motif (bi= "+nearest_motif+")", nearest_motif, pxchartbottom-30);
GfxSelectPen(colorRed, 2, 3 );
GfxSetTextAlign(6); GfxSetTextColor(colorRed);
GfxTextOut("Motif (bi= "+discord_idx+")", discord_idx, pxcharttop+30);
GfxTextOut("nearest_discord (bi= "+nearest_discord+")", nearest_discord, pxcharttop+30);
//---------------------------------------------------------------------------------------
// plot vertical dashed lines/rectangle for motif and nearest bars
GfxSelectPen(colorBrightGreen, 3, 2 );
GfxMoveTo(motif_idx, pxchartbottom-20);
GfxLineTo(motif_idx, pxcharttop+20);
GfxLineTo(motif_idx+pWind, pxcharttop+20);
GfxLineTo(motif_idx+pWind, pxchartbottom-20);
GfxLineTo(motif_idx, pxchartbottom-20);
GfxMoveTo(nearest_motif, pxchartbottom-20);
GfxLineTo(nearest_motif, pxcharttop+20);
GfxLineTo(nearest_motif+pWind, pxcharttop+20);
GfxLineTo(nearest_motif+pWind, pxchartbottom-20);
GfxLineTo(nearest_motif, pxchartbottom-20);
// plot vertical dashed lines for discord and nearest bars
GfxSelectPen(colorRed, 3, 2 );
GfxMoveTo(discord_idx, pxchartbottom-20);
GfxLineTo(discord_idx, pxcharttop+20);
GfxLineTo(discord_idx+pWind, pxcharttop+20);
GfxLineTo(discord_idx+pWind, pxchartbottom-20);
GfxLineTo(discord_idx, pxchartbottom-20);
GfxMoveTo(nearest_discord, pxchartbottom-20);
GfxLineTo(nearest_discord, pxcharttop+20);
GfxLineTo(nearest_discord+pWind, pxcharttop+20);
GfxLineTo(nearest_discord+pWind, pxchartbottom-20);
GfxLineTo(nearest_discord, pxchartbottom-20);
//////////////////////////////////////////////////////////////////////////////
// TITLE
_N(Title = StrFormat("<<"+pdir+pfn+".py"+">> <<Context: "+ctxt+">>"+
"\n{{NAME}} - {{INTERVAL}} {{DATE}},"+
" Open %g, Hi %g, Lo %g, Close %g (%.1f%%)"+
"\nstartDate: "+StartDate+" endDate: "+EndDate+
"\nszRows= %g, szCols= %g",
O, H, L, C, SelectedValue(ROC(C,1)), rows, cols ));
// thanks @beppe
_N(Title = Title+"\nfvb(bi): "+fvb+ " lvb(bi): "+lvb+" bars(bi): "+tot);
_N(Title = Title+"\nlvb(status): "+lvx+" BLANKS: " + blank);
_N(Title = Title+" Barcount: " + BarCount);
_N(Title = Title + StrFormat("\n{{VALUES}}"));
type or paste code here
python code here
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 27 13:51:07 2021
@author: FE
"""
import sys
from numpy.random import f
from pyparsing import col
# proj folder
proj_path = "C:\\Proj\\Stumpy\\"
data_path = proj_path+"data\\"
images_path = proj_path+"images\\"
# data folder
if data_path not in sys.path:
sys.path.append(data_path)
# images folder
if images_path not in sys.path:
sys.path.append(images_path)
import stumpy
import numpy as np
import pandas as pd
from numba import cuda
from numba.cuda.simulator.api import list_devices
import matplotlib.pyplot as plt
import matplotlib.dates as dates
from matplotlib.patches import Rectangle
import datetime as dt
#plt.style.use('https://raw.githubusercontent.com/TDAmeritrade/stumpy/main/docs/stumpy.mplstyle')
plt.style={'figsize': (20, 6), 'xtick.direction' : 'out'}
import AmiPy
global df #each on own line
global df1
global matrix_profile
df = pd.DataFrame()
df1 = pd.DataFrame()
matrix_profile = []
# plot signal and results
dspy = False
# print AmiPy to AFL messages
amimsgs = False
amiArgs = False
amiRtn = True
# plot style - Make the graphs a bit prettier, and bigger
#pd.set_option('display.mpl_style', 'default')
#plt.style={'figsize': (20, 6), 'xtick.direction' : 'out'}
plt.rcParams['figure.figsize'] = (15, 6)
def Sel_From_Quotes(sel):
# select TimeSeries from list of columns which saved as quotes
#TS_Name = df.columns #get the available column names as a list
#colname = TS.Name.iloc(sel) #select the desired column
#todo - able to include multiple dimensions
#return df.array[columns=colname] #return the column from quotes to be the timeseries
pass
def Quotes(Oarr,Harr,Larr,Carr,Varr):
global df
# quotes[OHLCV]
df['Open'] = Oarr
df['High'] = Harr
df['Low'] = Larr
df['Close'] = Carr
df['Volume'] = Varr
#AmiPy.Print( "Quotes to DataFrame success\n" )
# create Quotes [DateTime,O,H,L,C,V] dataframe, with dateTime as index
def Get_TS(DNarr, arrTS, tsLabel):
global df
df = pd.DataFrame()
df.Name = tsLabel
# Use AFL-DateNum: 10000 * (year - 1900) + 100 * month + day
df['year'] = (DNarr/10000 + 1900).astype(int)
df['month'] = (DNarr/100 % 100).astype(int)
df['day'] = (DNarr % 100).astype(int)
# covert to date from columns=year,month,day[hour,minute,second,millisec,microsecond]
df['Date'] = pd.to_datetime(df)
df.drop(columns=['year','month','day'], inplace=True)
df['TS'] = arrTS.astype(np.float64)
return df.shape[0]
# Prepare dataset from timeseries
def Prepare_TS(start_date,end_date, tsLabel):
global mask
global df1
#df1 = pd.DataFrame()
df1.Name = tsLabel
mask = (df['Date'] >= start_date) & (df['Date'] <= end_date)
df1 = df.loc[mask]
df1['Predict'] = 0.0;
df1['Predict'] = 0.0;
df1['Forecast'] = 0.0;
df1['Forecast'] = 0.0;
#if amiRtn:
# AmiPy.Print("\nSeries= "+np.str_(df1.head))
#return df1.shape[0]
# save Timeseries (df.Name exists since Get_TS () has run)
def Save_TS_csv():
global df
df.to_csv(data_path+df.Name+'.csv',index=False) #saves without an index
#return df.shape[0]
# Get Timeseries (needs tsLabel passed by AmiBroker)
def Get_TS_csv(tsLabel):
global df # required since 1st use hasn't bound df
df = pd.read_csv(data_path+tsLabel+'.csv', index_col=0, infer_datetime_format=True) #date is index
df.Name = tsLabel
#return df.shape[0]
# Evaluate - obtain the best fit or anamoly (least fit)
def Find_Motif(wind):
global motif_idx, nearest_motif_distance
# Best match - find the matching motif (smallest value after sort)
motif_idx = np.argsort(matrix_profile[:, 0])[0]
# and the nearest neighbor to this motif has a distance:
nearest_motif_distance = matrix_profile[motif_idx, 1]
#show_Motif(wind,20)
#if amimsgs:
# AmiPy.Print(f"\nThe motif is located at index {motif_idx}")
# AmiPy.Print(f"\nThe nearest neighbor subsequence to this motif is {nearest_motif_distance} units away")
Best = np.array([[motif_idx], [nearest_motif_distance]],dtype='int32')
#if amiRtn:
# AmiPy.Print("\nBest= "+np.str_(Best))
return Best
def Find_Discord(wind):
global discord_idx, nearest_discord_distance
# Least match or Anomoly
discord_idx = np.argsort(matrix_profile[:, 0])[-1]
# and the nearest neighbor to this discord has a distance that is quite far away:
nearest_discord_distance = matrix_profile[discord_idx, 1]
#Show_Anomoly(wind,20)
#if amimsgs:
# AmiPy.Print(f"\nThe discord is located at index {discord_idx}")
# AmiPy.Print(f"\nThe nearest neighbor subsequence to this discord is {nearest_discord_distance} units away")
Least = np.array([[discord_idx], [nearest_discord_distance]],dtype='int32')
#if amiRtn:
# AmiPy.Print("\nLeast= "+np.str_(Least))
return Least
# Visualise the Timeseries, nearest_neighbour & Motif
# rectangle width=wind, rectangle height=20
def show_Motif(m,w=20):
# let’s put all of this together and plot the matrix profile next to our raw data:
fig, axs = plt.subplots(2, sharex=True, gridspec_kw={'hspace': 0})
plt.suptitle('Motif (Pattern) Discovery', fontsize='30')
axs[0].plot(df['TS'].values)
axs[0].set_ylabel('Timeseries', fontsize='20')
rect = Rectangle((motif_idx, 0), m, w, facecolor='lightgrey')
axs[0].add_patch(rect)
rect = Rectangle((nearest_motif_distance, 0), m, w, facecolor='lightgrey')
axs[0].add_patch(rect)
axs[1].set_xlabel('Time', fontsize ='20')
axs[1].set_ylabel('Matrix Profile', fontsize='20')
axs[1].axvline(x=motif_idx, linestyle="dashed")
axs[1].axvline(x=nearest_motif_distance, linestyle="dashed")
axs[1].plot(matrix_profile[:, 0])
plt.show()
# Visualise the Timeseries & Anomoly (discord)
def Show_Anomoly(m,w=20):
# The subsequence located at this global maximum is also referred to as a discord, novelty, or “potential anomaly”:
fig, axs = plt.subplots(2, sharex=True, gridspec_kw={'hspace': 0})
plt.suptitle('Discord (Anomaly/Novelty) Discovery', fontsize='30')
axs[0].plot(df['TS'].values)
axs[0].set_ylabel('Timeseries', fontsize='20')
rect = Rectangle((discord_idx, 0), m, w, facecolor='lightgrey')
axs[0].add_patch(rect)
axs[1].set_xlabel('Time', fontsize ='20')
axs[1].set_ylabel('Matrix Profile', fontsize='20')
axs[1].axvline(x=discord_idx, linestyle="dashed")
axs[1].axvline(x=nearest_discord_distance, linestyle="dashed")
axs[1].plot(matrix_profile[:, 0])
plt.show()
# Find Snippets, to generate a model of the timeseries
def Find_Snippets():
pass
# Check for Timeseries chain
def Check_ATSC():
pass
# Determine Matrix Profile
def Get_Profile(wind):
global df, matrix_profile
#window_size = 32 # which is approximately how many data points might be found in a pattern
wind = np.int_(wind)
matrix_profile = stumpy.stump(df['TS'].array, m=wind)
if amiRtn:
#AmiPy.Print("\n['Date'] Type is "+np.str_(df['Date'].dtype))
AmiPy.Print("\n(nrow,col)= "+np.str_(df.shape))
#AmiPy.Print("\nProfile= "+np.str_(matrix_profile))
return matrix_profile
# Determine Matrix Profile - the model
def Find_Profile(tsLabel, wind):
global df, matrix_profile
df =pd.DataFrame()
# get the csv timeseries, assumes Get_TS already done, so df exists
Get_TS()
#window_size = 50 # Approximately how many data points might be found in a pattern
wind = np.int_(wind)
matrix_profile = stumpy.stump(df['TS'].array, m=wind)
if amiRtn:
AmiPy.Print("\n['Date'] Type is "+np.str_(df['Date'].dtype))
AmiPy.Print("\n(nrow,col)= "+np.str_(df.shape))
AmiPy.Print("\nProfile= "+np.str_(matrix_profile))
return matrix_profile
# Get Matrix Profile using TS(csv) dataset
def Get_Profile(tsLabel, wind):
# get the csv timeseries
df.Name = tsLabel
Get_TS_csv()
wind = np.int_(wind)
matrix_profile = stumpy.stump(df['TS'].array, m=wind)
#if amiRtn:
# AmiPy.Print("\n['Date'] Type is "+np.str_(df['Date'].dtype))
# AmiPy.Print("\n(nrow,col)= "+np.str_(df.shape))
# AmiPy.Print("\nProfile= "+np.str_(matrix_profile))
return matrix_profile