"""finrl-package-usage.ipynb
Automatically generated by Colaboratory.
Original file is located at https://colab.research.google.com/drive/107H08RliYht18Y1J2tAph8UDiGqEGria """
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git
import numpy as np import pandas as pd import matplotlib import matplotlib.pyplot as plt
import datetime
from finrl.config import config from finrl.marketdata.yahoodownloader import YahooDownloader from finrl.preprocessing.preprocessors import FeatureEngineer from finrl.preprocessing.data import data_split from finrl.env.env_stocktrading import StockTradingEnv from finrl.model.models import DRLAgent from finrl.trade.backtest import backtest_stats, backtest_plot, get_daily_return, get_baseline
from pprint import pprint
import sys sys.path.append("../FinRL-Library")
import itertools
import os if not os.path.exists("./" + config.DATA_SAVE_DIR): os.makedirs("./" + config.DATA_SAVE_DIR) if not os.path.exists("./" + config.TRAINED_MODEL_DIR): os.makedirs("./" + config.TRAINED_MODEL_DIR) if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR): os.makedirs("./" + config.TENSORBOARD_LOG_DIR) if not os.path.exists("./" + config.RESULTS_DIR): os.makedirs("./" + config.RESULTS_DIR)
df = YahooDownloader(start_date = '2016-01-01', end_date = '2021-03-17', ticker_list = config.DOW_30_TICKER).fetch_data()
df.sort_values(['tic','date'],ignore_index=True).head()
def mfi(df, ticker): cond = df.tic == ticker df = df[['date', 'high', 'low', 'close', 'volume','tic']][cond].reset_index(drop=True) df_rolled = df[['date', 'high', 'low', 'close', 'volume','tic']].shift(1) df_rolled.columns = ['date_p','high_p','low_p', 'close_p', 'volume_p', 'tic'] df_merge = pd.concat([df, df_rolled], axis=1) print (df_merge)
# calculate moving volume average
df_merge = df_merge[df_merge['volume'] != 0]
df_merge['volume_avg'] = df_merge['volume'].rolling(3).mean().shift(1)
df_merge['high_avg'] = df_merge['high'].rolling(5).mean().shift(1)
df_merge['low_avg'] = df_merge['low'].rolling(5).mean().shift(1)
df_merge['del_close'] = df_merge['close'] - df_merge['close_p']
df_merge['del_volume'] = df_merge['volume'] - df_merge['volume_p']
df_merge['acc'] = df_merge['del_close'] / df_merge['del_volume']
df_merge['acc_p'] = df_merge['acc'].shift(1)
df_merge['del_acc'] = (df_merge['acc'] - df_merge['acc_p'])*10**5
def flag(x):
if x['del_close'] > 0 and \
x['del_volume'] / x['volume_avg'] > 0.03 and \
x['del_acc'] > 0 and \
x['close'] > x['high_avg']:
return 'B'
if x['del_close'] < 0 and \
x['del_volume'] / x['volume_avg'] > 0.03 and \
x['del_acc'] < 0 and \
x['low'] < x['low_avg']:
return 'S'
df_merge['type'] = df_merge.apply(flag, axis=1)
df_merge = df_merge[["date", "close", "volume", "del_acc"]].reset_index(drop=True)
df_merge['tic'] = ticker
print("Final\n", df_merge)
return df_merge
df = mfi(df, 'HD')
config.TECHNICAL_INDICATORS_LIST = ['rsi_30', 'close_30_sma']
fe = FeatureEngineer( use_technical_indicator=True, tech_indicator_list = config.TECHNICAL_INDICATORS_LIST, use_turbulence=False, user_defined_feature = False)
processed = fe.preprocess_data(df)
list_ticker = processed["tic"].unique().tolist() list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str)) combination = list(itertools.product(list_date,list_ticker))
processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left") processed_full = processed_full[processed_full['date'].isin(processed['date'])] processed_full = processed_full.sort_values(['date','tic'])
processed_full = processed_full.fillna(0)
processed_full['close_30_sma'] = 1 - processed_full['close']/processed_full['close_30_sma'] processed_full = processed_full[['date','tic','close','volume','rsi_30','close_30_sma','del_acc']]
processed_full.sort_values(['date','tic'],ignore_index=True).head(20)
train = data_split(processed_full, '2016-01-01','2020-01-01')
print(len(train))
config.TECHNICAL_INDICATORS_LIST = ['close_30_sma', 'rsi_30', 'del_acc'] print (config.TECHNICAL_INDICATORS_LIST )
stock_dimension = len(train.tic.unique()) state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
env_kwargs = { "hmax": 100, "initial_amount": 50000, "buy_cost_pct": 0.001, "sell_cost_pct": 0.001, "state_space": state_space, "stock_dim": stock_dimension, "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, "action_space": stock_dimension, "reward_scaling": 1e-4
}
e_train_gym = StockTradingEnv(df = train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env() print(type(env_train))
agent = DRLAgent(env = env_train) model_ddpg = agent.get_model("ddpg") trained_ddpg = agent.train_model(model=model_ddpg, tb_log_name='ddpg', total_timesteps=50000)
agent = DRLAgent(env = env_train) PPO_PARAMS = { "n_steps": 2048, "ent_coef": 0.01, "learning_rate": 0.00025, "batch_size": 128, } model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS) trained_ppo = agent.train_model(model=model_ppo, tb_log_name='ppo', total_timesteps=50000)
trade = data_split(processed_full, '2020-01-02','2021-03-17') e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)
df_account_value, df_actions = DRLAgent.DRL_prediction( model=trained_ppo, environment = e_trade_gym)
df_account_value.tail()
df_actions
print("==============Get Backtest Results===========") now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')
perf_stats_all = backtest_stats(account_value=df_account_value) perf_stats_all = pd.DataFrame(perf_stats_all) perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')
#baseline stats print("==============Get Baseline Stats===========") baseline_df = get_baseline( ticker="HD", start = '2020-01-02', end = '2021-03-17')
stats = backtest_stats(baseline_df, value_col_name = 'close')
print("==============Compare to DJIA===========")
backtest_plot(df_account_value, baseline_ticker = 'HD', baseline_start = '2020-01-02', baseline_end = '2021-03-17')