This is an implementation of the Temporal Fusion Transformer network architecure to predict the future bitcoin market price
Running the gen_csv_year(year, symbol, interval) in DownloadData.ipynb will download past price data using the binance api
The training set consists of 2018 and 2019, 2020 is used for testing
Lets start with the necessary imports, as well as matplotlib for visualisation purposes
import torch
import torch.nn as nn
from network import *
from data import *
import pandas as pd
%matplotlib notebook
import matplotlib.pyplot as plt
import math
from mpl_finance import candlestick_ohlc
Next we define which columns are used as continuous and discrete input, as well as prediction targets.
continuous_columns = ['Open', 'High', 'Low', 'Close']
discrete_columns = ['Hour']#, 'Day', 'Month']
target_columns = ['Close']
Load the bitcoin data into memory
print("Loading : ")
btc_data = load_data(['2018', '2019'], 'BTCUSDT', continuous_columns, '5m')
btc_test_data = load_data(['2020'], 'BTCUSDT', continuous_columns, interval = '5m')
Loading :
done
done
Next we define the hyperparameters, more details can be found in the temporal fusion transformer paper
#input data shape
n_variables_past_continuous = 4
n_variables_future_continuous = 0
n_variables_past_discrete = [24]#, 31, 12]
n_variables_future_discrete = [24]#, 31, 12]
#hyperparams
batch_size = 160
test_batch_size = 160
n_tests = 25
dim_model = 160
n_lstm_layers = 4
n_attention_layers = 3
n_heads = 6
quantiles = torch.tensor([0.1, 0.5, 0.9]).float().type(torch.cuda.FloatTensor)
past_seq_len = 80
future_seq_len = 15
Either load model from a checkpoint or initialise a new one
load_model = True
path = "model_100000.pt"
#initialise
t = TFN(n_variables_past_continuous, n_variables_future_continuous,
n_variables_past_discrete, n_variables_future_discrete, dim_model,
n_quantiles = quantiles.shape[0], dropout_r = 0.2,
n_attention_layers = n_attention_layers,n_lstm_layers = n_lstm_layers, n_heads = n_heads).cuda()
optimizer = torch.optim.Adam(t.parameters(), lr=0.0005)
#try to load from checkpoint
if load_model:
checkpoint = torch.load(path)
t = checkpoint['model_state']
optimizer.load_state_dict(checkpoint['optimizer_state'])
losses = checkpoint['losses']
test_losses = checkpoint['test_losses']
print("Loaded model from checkpoint")
else:
losses = []
test_losses = []
print("No checkpoint loaded, initialising model")
#losses = []
Loaded model from checkpoint
define generators for training and test sets
btc_gen = get_batches(btc_data, past_seq_len,
future_seq_len, continuous_columns, discrete_columns,
target_columns, batch_size = batch_size)
test_btc_gen = get_batches(btc_test_data, past_seq_len,
future_seq_len, continuous_columns, discrete_columns,
target_columns, batch_size = batch_size, norm = btc_data)
Now lets begin the training process First we create a figure for data visualastion
The network is saved periodically. Therefore overtraining is not a concern, as we can look back and pick the iteration with the best test set performance
fig = plt.figure()
ax = fig.add_subplot(411)
ax1 = fig.add_subplot(412)
ax2 = fig.add_subplot(413)
ax3 = fig.add_subplot(414)
plt.ion()
fig.canvas.draw()
fig.show()
steps = 200000
for e in range(steps):
#run model against test set every 50 batches
if(e % 50 == 0):
t.eval()
m_test_losses = []
for i in range(n_tests):
test_loss,_ , _, _ = forward_pass(t, test_btc_gen, test_batch_size, quantiles)
m_test_losses.append(test_loss.cpu().detach().numpy())
t.train()
test_losses.append(np.array(m_test_losses).mean())
#save model every 400 batches
if(e % 400 == 0):
torch.save({'model_state' : t,
'optimizer_state': optimizer.state_dict(),
'losses' : losses, 'test_losses' : test_losses} , "model_{}.pt".format(len(losses)))
#forward pass
optimizer.zero_grad()
loss, net_out, vs_weights, given_data = forward_pass(t, btc_gen, batch_size, quantiles)
net_out = net_out.cpu().detach()[0]
#backwards pass
losses.append(loss.cpu().detach().numpy())
loss.backward()
optimizer.step()
#loss graphs
fig.tight_layout(pad = 0.1)
ax.clear()
ax.title.set_text("Training loss")
ax.plot(losses[250:])
ax1.clear()
ax1.title.set_text("Test loss")
ax1.plot(test_losses[5:])
#compare network out put and data
ax2.clear()
ax2.title.set_text("Network output comparison")
c = given_data[0][0].cpu()
a = torch.arange(-past_seq_len, 0).unsqueeze(-1).unsqueeze(-1).float()
c = torch.cat((a,c), dim = 1)
candlestick_ohlc(ax2, c.squeeze(), colorup = "green", colordown = "red")
ax2.plot(net_out[:,0], color = "red")
ax2.plot(net_out[:,1], color = "blue")
ax2.plot(net_out[:,2], color = "red")
ax2.plot(given_data[3].cpu().detach().numpy()[0], label = "target", color = "orange")
#visualise variable selection weights
vs_weights = torch.mean(torch.mean(vs_weights, dim = 0), dim = 0).squeeze()
vs_weights = vs_weights.cpu().detach().numpy()
ax3.clear()
ax3.title.set_text("Variable Selection Weights")
plt.xticks(rotation=-30)
x = ['Open', 'High', 'Low', 'Close', 'Hour']
ax3.bar(x = x, height = vs_weights)
fig.canvas.draw()
del loss
del net_out
del vs_weights
del given_data
if e >= 2:
break
The first two graphs simply represent training and test losses respectively
The third graph shows given data in candlestick form, target data in orange, and the networks best guess in blue. Red lines represent 90% and 10% quantiles
The final graph shows variable selection weights, a feature of temporal fusion networks showing how much importance is attributed to each inputFinally lets put the network into evaluation mode and visualise some test set comparisons
#Draw test cases
fig = plt.figure()
axes = []
batch_size_ = 4
for i in range(batch_size_):
axes.append(fig.add_subplot(411 + i))
test_btc_gen = get_batches(btc_test_data, past_seq_len,
future_seq_len, continuous_columns, discrete_columns,
target_columns, batch_size = batch_size_, norm = btc_data)
loss, net_out, vs_weights, given_data = forward_pass(t, test_btc_gen, batch_size_, quantiles)
net_out = net_out.cpu().detach()
t.eval()
for idx, a in enumerate(axes):
a.clear()
c = given_data[0][idx].cpu()
b = torch.arange(-past_seq_len, 0).unsqueeze(-1).unsqueeze(-1).float()
c = torch.cat((b,c), dim = 1)
candlestick_ohlc(a, c.squeeze(), colorup = "green", colordown = "red")
a.plot(net_out[idx][:,0], color = "red")
a.plot(net_out[idx][:,1], color = "blue")
a.plot(net_out[idx][:,2], color = "red")
a.plot(given_data[3].cpu().detach().numpy()[idx], label = "target", color = "orange")
t.train()
plt.ion()
fig.show()
fig.canvas.draw()
resources :
- Temporal Fusion Transformer : https://arxiv.org/abs/1912.09363
- Binance bitcoin dataset : https://github.com/binance-exchange/binance-official-api-docs/blob/master/rest-api.md