from numpy.random import default_rng


rng = default_rng()


rng.integers(0, 2, 5)

array([1, 0, 1, 0, 0])


rng.integers(0, 2, 5)

array([1, 0, 0, 1, 0])


import random
import numpy as np
import pandas as pd
import cufflinks as cf
cf.set_config_file(offline=True, theme='ggplot')


url = 'https://hilpisch.com/aiif_eikon_eod_data.csv'


raw = pd.read_csv(url, index_col=0, parse_dates=True)


symbol = 'EUR='


raw['bull'] = np.log(raw[symbol] / raw[symbol].shift(1))


data = pd.DataFrame(raw['bull']).loc['2015-01-01':]


data.dropna(inplace=True)


rng = default_rng(100)


data['random'] = rng.choice([-1, 1], len(data)) * data['bull']


data['bear'] = -data['bull']


def top(t):
    top = pd.DataFrame(data['bull'])
    top.columns = ['top']
    top = top.sort_values('top')
    n = int(len(data) * t)
    top['top'] = abs(top['top'])
    top['top'].iloc[n:-n] = rng.choice([-1, 1],
                len(top['top'].iloc[n:-n])) * top['top'].iloc[n:-n]
    data[f'{int(t * 100)}_top'] = top.sort_index()


for t in [0.1, 0.15]:
    top(t)


def afi(ratio):
    correct = rng.binomial(1, ratio, len(data))
    random = rng.choice([-1, 1], len(data))
    strat = np.where(correct, abs(data['bull']), random * data['bull'])
    data[f'{int(ratio * 100)}_afi'] = strat


for ratio in [0.51, 0.6, 0.75, 0.9]:
    afi(ratio)


print(data.head())

                bull    random      bear    10_top    15_top    51_afi  \
Date                                                                     
2015-01-01  0.000413  0.000413 -0.000413  0.000413  0.000413  0.000413   
2015-01-02 -0.008464 -0.008464  0.008464  0.008464  0.008464  0.008464   
2015-01-05 -0.005767  0.005767  0.005767 -0.005767  0.005767  0.005767   
2015-01-06 -0.003611 -0.003611  0.003611 -0.003611 -0.003611 -0.003611   
2015-01-07 -0.004299  0.004299  0.004299  0.004299  0.004299  0.004299   

              60_afi    75_afi    90_afi  
Date                                      
2015-01-01  0.000413  0.000413  0.000413  
2015-01-02  0.008464  0.008464  0.008464  
2015-01-05 -0.005767  0.005767  0.005767  
2015-01-06  0.003611  0.003611 -0.003611  
2015-01-07 -0.004299  0.004299  0.004299


data.sum().apply(np.exp)

bull       0.926676
random     1.245684
bear       1.079126
10_top    12.322428
15_top    23.343766
51_afi    18.627366
60_afi    16.950628
75_afi    43.611802
90_afi    90.892721
dtype: float64


data.cumsum().apply(np.exp).iplot(colorscale='reds')


data = pd.DataFrame(raw[symbol])


data['SMA1'] = data[symbol].rolling(42).mean()


data['SMA2'] = data[symbol].rolling(252).mean()


data['POSITION'] = np.where(data['SMA1'] > data['SMA2'], 1, -1)


data.iloc[252:].iplot(secondary_y='POSITION')


data = raw[['.SPX', '.VIX']]


rets = np.log(data / data.shift(1)).dropna()


print(rets.corr())

          .SPX      .VIX
.SPX  1.000000 -0.809235
.VIX -0.809235  1.000000


rets.iplot(kind='scatter', x='.SPX', y='.VIX', mode='markers',
           size=4, bestfit=True, xTitle='.SPX', yTitle='.VIX')


rets['.SPX'].rolling(252).corr(rets['.VIX']).dropna().iplot(bestfit=True)


rets['.SPX'].corr(rets['.VIX'].shift(1))

0.013860922375245097


rets['.SPX'].rolling(252).corr(rets['.VIX'].shift(1)).dropna().iplot(bestfit=True)


rets['.VIX'].corr(rets['.SPX'].shift(1))

0.06536531043304598


rets['.VIX'].rolling(252).corr(rets['.SPX'].shift(1)).dropna().iplot(bestfit=True)


url = 'http://hilpisch.com/pyalgo_eikon_eod_data.csv'


raw = pd.read_csv(url, index_col=0, parse_dates=True).dropna()


raw.iloc[:5, :5]


raw['RANDOM'] = 100
raw['RANDOM'].iloc[1:] += np.random.standard_normal(len(raw) - 1).cumsum()


raw[['GS.N', 'RANDOM']].normalize().iplot();


symbol = 'RANDOM'
symbol = 'GS.N'


data = pd.DataFrame(raw[symbol])


lags = 5
cols = list()
for lag in range(1, lags + 1):
    col = f'lag_{lag}'
    data[col] = data[symbol].shift(lag)
    cols.append(col)
data.dropna(inplace=True)


reg = np.linalg.lstsq(data[cols], data[symbol], rcond=-1)[0]

reg

array([ 0.980752  ,  0.03435725, -0.01860718,  0.00624259, -0.00272114])


data[cols].corr()


symbol = 'RANDOM'
symbol = 'GS.N'


def create_features(lags):
    data = pd.DataFrame(raw[symbol])
    data['r'] = np.log(data[symbol] / data[symbol].shift(1))
    data['d'] = np.sign(data['r'])
    data['d'] = data['d']
    cols = list()
    for lag in range(1, lags + 1):
        col = f'lag_{lag}'
        data[col] = data['d'].shift(lag)
        cols.append(col)
    data.dropna(inplace=True)
    data[['d'] + cols] = data[['d'] + cols].astype(int)
    return data, cols


lags = 2


data, cols = create_features(lags)


2 ** lags  # number of patters

4


data.tail()


split = int(0.8 * len(data))


train = data.iloc[:split]


test = data.iloc[split:]


import copy


sel = copy.deepcopy(cols)
sel.append('d')


grouped = train.groupby(sel)


res = grouped['d'].size().unstack(fill_value=0)


# relative freuency for upward movement
res['perc'] = res[1] / res.sum(axis=1)


res  # perc = relative freuency for upward movement


lags = 5


data, cols = create_features(lags)


2 ** lags  # number of patters

32


train = data.iloc[:split]


test = data.iloc[split:]


from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


model = DecisionTreeClassifier(max_depth=3)


model.fit(train[cols], train['d'])

DecisionTreeClassifier(max_depth=3)


accuracy_score(train['d'], model.predict(train[cols]))  # in-sample

0.554228855721393


accuracy_score(test['d'], model.predict(test[cols]))  # out-of-sample

0.48


from sklearn.svm import SVC


model = SVC()


model.fit(train[cols], train['d'])

SVC()


accuracy_score(train['d'], model.predict(train[cols]))  # in-sample

0.5681592039800994


accuracy_score(test['d'], model.predict(test[cols]))  # out-of-sample

0.478


from sklearn.neural_network import MLPClassifier


model = MLPClassifier(shuffle=False)


model.fit(train[cols], train['d'])

MLPClassifier(shuffle=False)


accuracy_score(train['d'], model.predict(train[cols]))  # in-sample

0.5502487562189055


accuracy_score(test['d'], model.predict(test[cols]))  # out-of-sample

0.462

	AAPL.O	MSFT.O	INTC.O	AMZN.O	GS.N
Date
2010-01-04	30.572827	30.950	20.88	133.90	173.08
2010-01-05	30.625684	30.960	20.87	134.69	176.14
2010-01-06	30.138541	30.770	20.80	132.25	174.26
2010-01-07	30.082827	30.452	20.60	130.00	177.67
2010-01-08	30.282827	30.660	20.83	133.52	174.31

	lag_1	lag_2	lag_3	lag_4	lag_5
lag_1	1.000000	0.997883	0.995849	0.993748	0.991664
lag_2	0.997883	1.000000	0.997880	0.995847	0.993742
lag_3	0.995849	0.997880	1.000000	0.997879	0.995844
lag_4	0.993748	0.995847	0.997879	1.000000	0.997876
lag_5	0.991664	0.993742	0.995844	0.997876	1.000000

	GS.N	r	d	lag_1	lag_2
Date
2019-12-24	229.91	0.003573	1	1	-1
2019-12-26	231.21	0.005638	1	1	1
2019-12-27	230.66	-0.002382	-1	1	1
2019-12-30	229.80	-0.003735	-1	-1	1
2019-12-31	229.93	0.000566	1	-1	-1

	d	-1	0	1	perc
lag_1	lag_2
-1	-1	212	0	237	0.527840
-1	1	237	0	283	0.544231
0	1	0	0	2	1.000000
1	-1	262	1	257	0.494231
	0	1	0	1	0.500000
	1	257	1	259	0.500967

Are Financial Markets Predictable?¶

Agenda¶

Predictability Defined¶

Why Does it Matter?¶

Prediction Methods¶

Correlation vs. Causation¶

Finance History¶

Efficient Markets¶

Passive Investing & Index Funds¶

Mathematics in Science¶

Unreasonable Effectiveness of Data¶

Artificial Intelligence¶

ML-Based Applications¶

Conclusions¶