Are Markets Predictable?
Dr Yves J Hilpisch | The AI Machine
For the tpqoa
package see http://github.com/yhilpisch/tpqoa.
import math
import tpqoa
import numpy as np
import pandas as pd
from pylab import plt
import cufflinks as cf
plt.style.use('seaborn')
%matplotlib inline
cf.set_config_file(offline=True)
np.random.seed(1)
dates = ['2019-03-01', '2019-06-01', '2019-09-01']
symbol = 'EUR_USD'
start = dates[1]
end = dates[2]
granularity = 'M10'
price = 'A'
fn = f'data/oanda_{symbol}_{start}_{end}_{granularity}_{price}.csv'
fn
%%time
try:
raw = pd.read_csv(fn, index_col=0, parse_dates=True)
except:
api = tpqoa.tpqoa('dyjh.cfg')
raw = api.get_history(symbol, start, end, granularity, price)
raw.to_csv(fn)
raw.info()
data = pd.DataFrame(raw['c'])
data.columns = [symbol]
data['r'] = np.log(data[symbol] / data[symbol].shift(1))
data['d'] = np.where(data['r'] > 0, 1, 0)
data.dropna(inplace=True)
data['long'] = data['r']
data['short'] = -data['r']
data['random'] = np.random.choice([-1, 1], len(data)) * data['r']
ld = len(data)
ld
split = int(ld * 0.6)
val_size = int(ld * 0.1)
train = data.iloc[:split]
val = train[-val_size:]
train = train[:-val_size]
test = data.iloc[split:].copy()
lags = 5
def create_lags(df, mu, std):
global cols
cols = []
df['rs'] = df['r'] - mu
df['rs'] /= std
for lag in range(1, lags + 1):
col = 'lag_{}'.format(lag)
df[col] = df['rs'].shift(lag)
cols.append(col)
df.dropna(inplace=True)
mu = train['r'].mean()
std = train['r'].std()
create_lags(train, mu, std)
train.head(5)
create_lags(val, mu, std)
val.head()
create_lags(test, mu, std)
test.head()
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
base_estimator = DecisionTreeClassifier(random_state=1, max_depth=3,
min_samples_leaf=10)
model = BaggingClassifier(base_estimator=base_estimator,
n_estimators=100,
bootstrap=True,
oob_score=True,
n_jobs=4,
random_state=100)
%time model.fit(train[cols], train['d'])
model.score(test[cols], test['d']) # prediction accuracy
test['p'] = model.predict(test[cols])
test['p'] = np.where(test['p'] > 0, 1, -1)
test['p'].value_counts() # positions taken
sum(test['p'].diff() != 0) # trades necessary
test['strategy_bag'] = test['p'] * test['r']
test[['strategy_bag', 'random', 'short', 'long']].cumsum(
).apply(np.exp).iplot(colorscale='rdbu')
from sklearn.neural_network import MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(192, 192),
activation='relu',
learning_rate_init=0.0005,
random_state=100,
max_iter=500,
validation_fraction=0.1,
shuffle=False,
early_stopping=True,
verbose=False)
%time model.fit(train[cols], train['d'])
test['p'] = model.predict(test[cols])
test['p'] = np.where(test['p'] > 0, 1, -1)
test['p'].value_counts()
sum(test['p'].diff() != 0)
test['strategy_mlp'] = test['p'] * test['r']
test[['strategy_mlp', 'strategy_bag', 'random', 'short', 'long']].cumsum(
).apply(np.exp).iplot(colorscale='rdbu')
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import RMSprop
np.random.seed(100)
tf.random.set_random_seed(100)
opt = RMSprop(lr=0.005, rho=0.9, epsilon=None, decay=0.0)
model = Sequential()
model.add(Dense(128, activation='relu', input_shape=(lags,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['acc'])
%%time
model.fit(train[cols], train['d'],
epochs=50, batch_size=32, verbose=False,
validation_data=(val[cols], val['d']));
res = pd.DataFrame(model.history.history)
res.tail(3)
res.iplot()
model.evaluate(test[cols], test['d'])
test['p'] = model.predict_classes(test[cols])
test['p'] = np.where(test['p'] > 0, 1, -1)
test['p'].value_counts()
sum(test['p'].diff() != 0)
test['strategy_dnn'] = test['p'] * test['r']
res_cols = ['strategy_dnn', 'strategy_mlp', 'strategy_bag',
'random', 'short', 'long']
r = test[res_cols].sum().apply(np.exp).sort_values(ascending=False)
r
r - r['long']
test[res_cols].cumsum().apply(np.exp).iplot(colorscale='rdbu')