Simple Neural Networks in Python

© Dr. Yves J. Hilpisch

The Python Quants GmbH

In [1]:
import numpy as np
import pandas as pd
from pylab import plt
plt.style.use('seaborn')
from pandas_datareader import data as web
%matplotlib inline

Raw Data & Features

In [2]:
h5 = pd.HDFStore('../live/data/equities.h5', 'r')
In [3]:
df = h5['data']
In [4]:
h5.close()
In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 4351 entries, 2000-01-03 to 2017-04-19
Data columns (total 7 columns):
AAPL      4351 non-null float64
MSFT      4351 non-null float64
YHOO      4351 non-null float64
^GDAXI    4297 non-null float64
^GSPC     4351 non-null float64
^VIX      4351 non-null float64
^SSMI     4274 non-null float64
dtypes: float64(7)
memory usage: 271.9 KB
In [6]:
raw = pd.DataFrame(df['^GDAXI'])
raw = pd.DataFrame(web.DataReader('DAX', data_source='google')['Close'])
In [7]:
raw.columns = ['prices']
In [8]:
raw.plot(figsize=(10, 6));
In [9]:
raw['log_rets'] = np.log(raw['prices'] / raw['prices'].shift(1))
raw.dropna(inplace=True)
raw['returns'] = np.where(raw['log_rets'] > 0, 1, 0)
In [10]:
lags = 3
In [11]:
cols = []
for lag in range(1, lags+1):
    col = 'lag_%d' % lag
    raw[col] = raw['log_rets'].shift(lag)
    cols.append(col)
In [12]:
raw['SMA'] = raw['prices'].rolling(10).mean()
raw['trend'] = ((raw['prices'] - raw['SMA']) > 0).astype(int).shift(1)
In [13]:
raw.head()
Out[13]:
prices log_rets returns lag_1 lag_2 lag_3 SMA trend
Date
2000-01-04 6586.950195 -0.024565 0 NaN NaN NaN NaN NaN
2000-01-05 6502.069824 -0.012970 0 -0.024565 NaN NaN NaN 0.0
2000-01-06 6474.919922 -0.004184 0 -0.012970 -0.024565 NaN NaN 0.0
2000-01-07 6780.959961 0.046182 1 -0.004184 -0.012970 -0.024565 NaN 0.0
2000-01-10 6925.520020 0.021094 1 0.046182 -0.004184 -0.012970 NaN 0.0
In [14]:
mean = raw['log_rets'].mean()
std = raw['log_rets'].std()
In [15]:
def buckets(x):
    bounds = [mean-std, 0, mean, mean+std]
    for i, b in enumerate(sorted(bounds)):
        if x < b: return i
    else: return i + 1
In [16]:
for _ in cols:
    raw[_] = raw[_].apply(buckets)
In [17]:
cols.append('trend')
lags +=1
In [18]:
raw.head()
Out[18]:
prices log_rets returns lag_1 lag_2 lag_3 SMA trend
Date
2000-01-04 6586.950195 -0.024565 0 4 4 4 NaN NaN
2000-01-05 6502.069824 -0.012970 0 0 4 4 NaN 0.0
2000-01-06 6474.919922 -0.004184 0 1 0 4 NaN 0.0
2000-01-07 6780.959961 0.046182 1 1 1 0 NaN 0.0
2000-01-10 6925.520020 0.021094 1 4 1 1 NaN 0.0

Neural Network Training — Small Data Set

In [19]:
data = raw.dropna().ix[-10:].copy()
In [20]:
data
Out[20]:
prices log_rets returns lag_1 lag_2 lag_3 SMA trend
Date
2017-04-03 12257.200195 -0.004532 0 3 3 3 12114.518945 1.0
2017-04-04 12282.339844 0.002049 1 1 3 3 12146.539941 1.0
2017-04-05 12217.540039 -0.005290 0 3 1 3 12177.881934 1.0
2017-04-06 12230.889648 0.001092 1 1 3 1 12197.002930 1.0
2017-04-07 12225.059570 -0.000477 0 3 1 3 12213.081934 1.0
2017-04-10 12200.519531 -0.002009 0 1 3 1 12233.526855 1.0
2017-04-11 12139.349609 -0.005026 0 1 1 3 12232.519824 0.0
2017-04-12 12154.700195 0.001264 1 1 1 1 12227.689844 0.0
2017-04-13 12109.000000 -0.003767 0 3 1 1 12212.946875 0.0
2017-04-19 12016.450195 0.001333 1 1 3 1 12183.304883 0.0
In [21]:
x = data[cols].values
x.astype(int)
Out[21]:
array([[3, 3, 3, 1],
       [1, 3, 3, 1],
       [3, 1, 3, 1],
       [1, 3, 1, 1],
       [3, 1, 3, 1],
       [1, 3, 1, 1],
       [1, 1, 3, 0],
       [1, 1, 1, 0],
       [3, 1, 1, 0],
       [1, 3, 1, 0]])
In [22]:
y = data['returns'].values.reshape((1, -1)).T
y
Out[22]:
array([[0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1]])
In [23]:
# initialize weights randomly with mean 0
np.random.seed(1)
weights = 2 * np.random.random((lags, 1)) - 1
weights
Out[23]:
array([[-0.16595599],
       [ 0.44064899],
       [-0.99977125],
       [-0.39533485]])
In [24]:
# sigmoid function
def sigmoid(x, deriv=False):
    if deriv == True:
        return sigmoid(x) * (1 - sigmoid(x))
    return 1 / (1 + np.exp(-x))
In [25]:
for _ in range(201):
    # forward propagation
    # layer 1
    l1 = sigmoid(np.dot(x, weights))

    # errors of layer 1
    e = y - l1
    if _ % 200 == 0:
        print('\nafter %d iterations' % _)
        print('MSE:    ', (e ** 2).mean())

    # multiply errors by the slope of the 
    # sigmoid at the values in l1
    d = e * sigmoid(l1, True)

    # update weights
    weights += np.dot(x.T, d)
after 0 iterations
MSE:     0.206736958195

after 200 iterations
MSE:     0.156683860336
In [26]:
print(l1.round().astype(int).T)
print(y.T)
[[0 0 0 1 0 1 0 0 0 1]]
[[0 1 0 1 0 0 0 1 0 1]]
In [27]:
l1.round() == y
Out[27]:
array([[ True],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [False],
       [ True],
       [ True]], dtype=bool)

Neural Network Training — Bigger Data Set

Training

In [28]:
cutoff = '2009-1-1'
In [29]:
data = raw[raw.index < cutoff].dropna().copy()
In [30]:
# initialize weights randomly with mean 0
np.random.seed(1)
weights = 2 * np.random.random((lags, 1)) - 1
weights
Out[30]:
array([[-0.16595599],
       [ 0.44064899],
       [-0.99977125],
       [-0.39533485]])
In [31]:
x = data[cols].values
In [32]:
y = data['returns'].values.reshape((1, -1)).T
In [33]:
for _ in range(1001):
    # forward propagation
    # layer 1
    l1 = sigmoid(np.dot(x, weights))

    # errors of layer 1
    e = y - l1
    if _ % 200 == 0:
        print('\nafter %d iterations' % _)
        print('MSE:    ', (e ** 2).mean())

    # multiply errors by the slope of the 
    # sigmoid at the values in l1
    d = e * sigmoid(l1, True)

    # update weights
    weights += 0.001 * np.dot(x.T, d)
after 0 iterations
MSE:     0.374704210808

after 200 iterations
MSE:     0.249062883733

after 400 iterations
MSE:     0.249062112447

after 600 iterations
MSE:     0.249062112072

after 800 iterations
MSE:     0.249062112069

after 1000 iterations
MSE:     0.249062112069
In [34]:
print(l1.round().astype(int).T)
print(y.T)
[[0 1 1 ..., 0 0 1]]
[[0 1 1 ..., 0 1 1]]
In [35]:
sum(y) / len(y)
Out[35]:
array([ 0.51916058])
In [36]:
sum(l1.round() == y) / len(data)
Out[36]:
array([ 0.51551095])
In [37]:
data['pred'] = np.where(l1.round() > 0, 1, -1)
In [38]:
data['strategy'] = data['log_rets'] * data['pred']
In [39]:
data.head()
Out[39]:
prices log_rets returns lag_1 lag_2 lag_3 SMA trend pred strategy
Date
2000-01-18 7072.120117 -0.014194 0 4 3 3 6827.580029 0.0 -1 0.014194
2000-01-19 7091.040039 0.002672 1 1 4 3 6877.989014 1.0 1 0.002672
2000-01-20 7112.660156 0.003044 1 3 1 4 6939.048047 1.0 1 0.003044
2000-01-21 6992.750000 -0.017002 0 3 3 1 6990.831055 1.0 1 -0.017002
2000-01-24 6931.990234 -0.008727 0 0 3 3 7005.934082 1.0 1 -0.008727
In [40]:
data[['log_rets', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6))
Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0x111ee8390>

Testing

In [41]:
weights
Out[41]:
array([[-0.03934092],
       [-0.01149524],
       [-0.01077916],
       [ 0.27719233]])
In [42]:
data = raw[raw.index > cutoff].dropna().copy()
In [43]:
x = data[cols].values
In [44]:
l1 = sigmoid(np.dot(x, weights))
In [45]:
l1.round()
Out[45]:
array([[ 1.],
       [ 1.],
       [ 1.],
       ..., 
       [ 0.],
       [ 0.],
       [ 0.]])
In [46]:
y = data['returns'].values.reshape((1, -1)).T
In [47]:
y
Out[47]:
array([[1],
       [1],
       [0],
       ..., 
       [1],
       [0],
       [1]])
In [48]:
sum(y) / len(y)
Out[48]:
array([ 0.5383488])
In [49]:
sum(l1.round() == y) / len(data)
Out[49]:
array([ 0.4963361])
In [50]:
data['pred'] = np.where(l1.round() > 0, 1, -1)
In [51]:
data['strategy'] = data['log_rets'] * data['pred']
In [52]:
data.head()
Out[52]:
prices log_rets returns lag_1 lag_2 lag_3 SMA trend pred strategy
Date
2009-01-05 4983.990234 0.002194 1 4 4 2 4718.768994 1.0 1 0.002194
2009-01-06 5026.310059 0.008455 1 3 4 4 4750.562012 1.0 1 0.008455
2009-01-07 4937.470215 -0.017833 0 3 3 4 4768.669043 1.0 1 -0.017833
2009-01-08 4879.910156 -0.011726 0 0 3 3 4786.990039 1.0 1 -0.011726
2009-01-09 4783.890137 -0.019873 0 1 0 3 4801.477051 1.0 1 -0.019873
In [53]:
data[['log_rets', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6))
Out[53]:
<matplotlib.axes._subplots.AxesSubplot at 0x11206d9b0>