AI in Finance¶

Are Markets Predictable?

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

Imports¶

For the tpqoa package see http://github.com/yhilpisch/tpqoa.

import math
import tpqoa
import numpy as np
import pandas as pd
from pylab import plt
import cufflinks as cf
plt.style.use('seaborn')
%matplotlib inline
cf.set_config_file(offline=True)
np.random.seed(1)

Data¶

dates = ['2019-03-01', '2019-06-01', '2019-09-01']

symbol = 'EUR_USD'
start =  dates[1]
end = dates[2]
granularity = 'M10'
price = 'A'
fn = f'data/oanda_{symbol}_{start}_{end}_{granularity}_{price}.csv'
fn

'data/oanda_EUR_USD_2019-06-01_2019-09-01_M10_A.csv'

%%time
try:
    raw = pd.read_csv(fn, index_col=0, parse_dates=True)
except:
    api = tpqoa.tpqoa('dyjh.cfg')
    raw = api.get_history(symbol, start, end, granularity, price)
    raw.to_csv(fn)

CPU times: user 75.9 ms, sys: 6.35 ms, total: 82.3 ms
Wall time: 116 ms

raw.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 9358 entries, 2019-06-02 21:00:00 to 2019-08-30 20:50:00
Data columns (total 6 columns):
c           9358 non-null float64
complete    9358 non-null bool
h           9358 non-null float64
l           9358 non-null float64
o           9358 non-null float64
volume      9358 non-null int64
dtypes: bool(1), float64(4), int64(1)
memory usage: 447.8 KB

data = pd.DataFrame(raw['c'])
data.columns = [symbol]
data['r'] = np.log(data[symbol] / data[symbol].shift(1))
data['d'] = np.where(data['r'] > 0, 1, 0)
data.dropna(inplace=True)

data['long'] = data['r']
data['short'] = -data['r']
data['random'] = np.random.choice([-1, 1], len(data)) * data['r']

ld = len(data)
ld

9357

split = int(ld * 0.6)
val_size = int(ld * 0.1)

train = data.iloc[:split]
val = train[-val_size:]
train = train[:-val_size]
test = data.iloc[split:].copy()

lags = 5

def create_lags(df, mu, std):
    global cols
    cols = []
    df['rs'] = df['r'] - mu
    df['rs'] /= std
    for lag in range(1, lags + 1):
        col = 'lag_{}'.format(lag)
        df[col] = df['rs'].shift(lag)
        cols.append(col)
    df.dropna(inplace=True)

mu = train['r'].mean()
std = train['r'].std()
create_lags(train, mu, std)

train.head(5)

create_lags(val, mu, std)

val.head()

create_lags(test, mu, std)

test.head()

Bagging¶

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

base_estimator = DecisionTreeClassifier(random_state=1, max_depth=3,
                                        min_samples_leaf=10)

model = BaggingClassifier(base_estimator=base_estimator,
                          n_estimators=100,
                          bootstrap=True,
                          oob_score=True,
                          n_jobs=4,
                          random_state=100)

%time model.fit(train[cols], train['d'])

CPU times: user 139 ms, sys: 65.8 ms, total: 205 ms
Wall time: 2.35 s

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=10, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=100, n_jobs=4, oob_score=True,
         random_state=100, verbose=0, warm_start=False)

model.score(test[cols], test['d'])  # prediction accuracy

0.5393258426966292

test['p'] = model.predict(test[cols])
test['p'] = np.where(test['p'] > 0, 1, -1)

test['p'].value_counts()  # positions taken

-1    2687
 1    1051
Name: p, dtype: int64

sum(test['p'].diff() != 0)  # trades necessary

1261

test['strategy_bag'] = test['p'] * test['r']

test[['strategy_bag', 'random', 'short', 'long']].cumsum(
    ).apply(np.exp).iplot(colorscale='rdbu')

MLP Regressor¶

from sklearn.neural_network import MLPClassifier

model = MLPClassifier(hidden_layer_sizes=(192, 192),
                      activation='relu',
                      learning_rate_init=0.0005,
                      random_state=100,
                      max_iter=500,
                      validation_fraction=0.1,
                      shuffle=False,
                      early_stopping=True,
                      verbose=False)

%time model.fit(train[cols], train['d'])

CPU times: user 2.1 s, sys: 45.1 ms, total: 2.14 s
Wall time: 1.46 s

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(192, 192), learning_rate='constant',
       learning_rate_init=0.0005, max_iter=500, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=100, shuffle=False, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

test['p'] = model.predict(test[cols])
test['p'] = np.where(test['p'] > 0, 1, -1)

test['p'].value_counts()

-1    1951
 1    1787
Name: p, dtype: int64

sum(test['p'].diff() != 0)

1263

test['strategy_mlp'] = test['p'] * test['r']

test[['strategy_mlp', 'strategy_bag', 'random', 'short', 'long']].cumsum(
    ).apply(np.exp).iplot(colorscale='rdbu')

Keras DNN¶

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import RMSprop

Using TensorFlow backend.

np.random.seed(100)
tf.random.set_random_seed(100)

opt = RMSprop(lr=0.005, rho=0.9, epsilon=None, decay=0.0)

model = Sequential()

model.add(Dense(128, activation='relu', input_shape=(lags,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['acc'])

%%time
model.fit(train[cols], train['d'],
          epochs=50, batch_size=32, verbose=False,
          validation_data=(val[cols], val['d']));

CPU times: user 15.8 s, sys: 2.63 s, total: 18.5 s
Wall time: 10.5 s

<keras.callbacks.History at 0x1a2916ba20>

res = pd.DataFrame(model.history.history)

res.tail(3)

res.iplot()

model.evaluate(test[cols], test['d'])

3738/3738 [==============================] - 0s 31us/step

[0.8469630597810046, 0.5171214553396482]

test['p'] = model.predict_classes(test[cols])
test['p'] = np.where(test['p'] > 0, 1, -1)

test['p'].value_counts()

-1    1966
 1    1772
Name: p, dtype: int64

sum(test['p'].diff() != 0)

1879

test['strategy_dnn'] = test['p'] * test['r']

res_cols = ['strategy_dnn', 'strategy_mlp', 'strategy_bag',
            'random', 'short', 'long']

r = test[res_cols].sum().apply(np.exp).sort_values(ascending=False)
r

strategy_dnn    1.037899
strategy_bag    1.037430
strategy_mlp    1.026202
short           1.013878
random          1.009287
long            0.986312
dtype: float64

r - r['long']

strategy_dnn    0.051587
strategy_bag    0.051118
strategy_mlp    0.039891
short           0.027567
random          0.022975
long            0.000000
dtype: float64

test[res_cols].cumsum().apply(np.exp).iplot(colorscale='rdbu')

	EUR_USD	r	d	long	short	random	rs	lag_1	lag_2	lag_3	lag_4	lag_5
time
2019-06-02 22:00:00	1.11728	0.000295	1	0.000295	-0.000295	0.000295	1.047357	-0.575584	0.188176	1.015723	-0.893941	-0.161873
2019-06-02 22:10:00	1.11727	-0.000009	0	-0.000009	0.000009	-0.000009	-0.034573	1.047357	-0.575584	0.188176	1.015723	-0.893941
2019-06-02 22:20:00	1.11712	-0.000134	0	-0.000134	0.000134	-0.000134	-0.480046	-0.034573	1.047357	-0.575584	0.188176	1.015723
2019-06-02 22:30:00	1.11714	0.000018	1	0.000018	-0.000018	0.000018	0.060886	-0.480046	-0.034573	1.047357	-0.575584	0.188176
2019-06-02 22:40:00	1.11737	0.000206	1	0.000206	-0.000206	-0.000206	0.729049	0.060886	-0.480046	-0.034573	1.047357	-0.575584

	EUR_USD	r	d	long	short	random	rs	lag_1	lag_2	lag_3	lag_4	lag_5
time
2019-07-17 09:50:00	1.12176	-0.000250	0	-0.000250	0.000250	-0.000250	-0.889962	1.866970	-0.066153	0.409342	0.155756	0.472823
2019-07-17 10:00:00	1.12170	-0.000053	0	-0.000053	0.000053	0.000053	-0.192901	-0.889962	1.866970	-0.066153	0.409342	0.155756
2019-07-17 10:10:00	1.12152	-0.000160	0	-0.000160	0.000160	0.000160	-0.573250	-0.192901	-0.889962	1.866970	-0.066153	0.409342
2019-07-17 10:20:00	1.12174	0.000196	1	0.000196	-0.000196	0.000196	0.694501	-0.573250	-0.192901	-0.889962	1.866970	-0.066153
2019-07-17 10:30:00	1.12156	-0.000160	0	-0.000160	0.000160	-0.000160	-0.573230	0.694501	-0.573250	-0.192901	-0.889962	1.866970

	EUR_USD	r	d	long	short	random	rs	lag_1	lag_2	lag_3	lag_4	lag_5
time
2019-07-25 21:40:00	1.11484	0.000027	1	0.000027	-0.000027	-0.000027	0.092905	-0.098417	0.252345	-0.098422	-0.321622	0.507442
2019-07-25 21:50:00	1.11489	0.000045	1	0.000045	-0.000045	-0.000045	0.156673	0.092905	-0.098417	0.252345	-0.098422	-0.321622
2019-07-25 22:00:00	1.11480	-0.000081	0	-0.000081	0.000081	0.000081	-0.289734	0.156673	0.092905	-0.098417	0.252345	-0.098422
2019-07-25 22:10:00	1.11472	-0.000072	0	-0.000072	0.000072	0.000072	-0.257867	-0.289734	0.156673	0.092905	-0.098417	0.252345
2019-07-25 22:20:00	1.11475	0.000027	1	0.000027	-0.000027	-0.000027	0.092912	-0.257867	-0.289734	0.156673	0.092905	-0.098417

	val_loss	val_acc	loss	acc
47	0.828585	0.522581	0.630126	0.642704
48	0.836504	0.508602	0.626266	0.636072
49	0.858591	0.509677	0.629465	0.641635