
'
Exercises
Dr. Yves J. Hilpisch
The Python Quants GmbH
PyData, New York City – 08. November 2013
As an exercise we want to implent functions to calculate the sum of the square root of the (positive) numbers in an array.
I = 1000000
a_py = range(I)
def f_py(a):
res = 0
for x in a:
res += x ** 0.5
return res
%time f_py(a_py)
CPU times: user 308 ms, sys: 93.6 ms, total: 402 ms Wall time: 314 ms
666666166.4588418
import numpy as np
a_np = np.arange(I)
def f_np(a):
return sum(a ** 0.5)
%time f_np(a_np)
CPU times: user 42.5 ms, sys: 5.05 ms, total: 47.6 ms Wall time: 47 ms
666666166.4588418
import numexpr as ne
def f_ne(a):
ex = 'sum(a ** 0.5)'
return ne.evaluate(ex)
%time f_ne(a_np)
CPU times: user 13 ms, sys: 690 µs, total: 13.7 ms Wall time: 12.9 ms
array(666666166.4588418)
from perf_comp_data import perf_comp_data
func_list = ['f_py', 'f_np', 'f_ne']
data_list = ['a_py', 'a_np', 'a_np']
perf_comp_data(func_list, data_list, rep=3)
function: f_ne, av. time sec: 0.01169, relative: 1.0 function: f_np, av. time sec: 0.04157, relative: 3.6 function: f_py, av. time sec: 0.19372, relative: 16.6
We want to generate a larger set of (pseudo-)random numbers to do the following:
The data should be of shape 2,500,000 rows with 6 numbers each.
from random import gauss
I, J = 2500000, 6
def gen_rows():
rows = []
for i in range(I):
rows.append([gauss(0, 1) for j in range(J)])
return rows
%time rows = gen_rows()
CPU times: user 22.5 s, sys: 525 ms, total: 23 s Wall time: 22.9 s
import csv
fi = open('data.csv', 'w')
csv_fi = csv.writer(fi)
csv_fi.writerow(['no1', 'no2', 'no3', 'no4', 'no5', 'no6'])
%time csv_fi.writerows(rows)
CPU times: user 24.5 s, sys: 534 ms, total: 25 s Wall time: 25.2 s
fi.close()
ll dat*
-rw-r--r-- 1 yhilpisch staff 296963744 8 Nov 14:29 data.csv
fi = open('data.csv', 'r')
for k in range(3):
print fi.readline(),
fi.close()
no1,no2,no3,no4,no5,no6 0.1644190526862174,0.4854525741415644,-0.8508669414572848,0.010516873383551673,-0.6460384655398392,-0.8541009269273129 -2.5892970908529787,1.3031672069809515,-0.43625524948867084,-0.8328548834797471,-0.37074691782996283,-0.31223264709143794
fi = open('data.csv', 'r')
csv_fi = csv.reader(fi)
def read_csv():
data = []
for line in csv_fi:
data.append(line)
return data
%time data = read_csv()
CPU times: user 7.7 s, sys: 1.83 s, total: 9.53 s Wall time: 10.3 s
data = 0.0; rows = 0.0
pandas does not bring too much performance improvement but a significant improvement in convenience.
import pandas as pd
%time rows = pd.DataFrame(np.random.standard_normal((I, J)))
CPU times: user 672 ms, sys: 71.3 ms, total: 744 ms Wall time: 745 ms
%time rows.to_csv('data.csp')
CPU times: user 27.7 s, sys: 603 ms, total: 28.3 s Wall time: 28.4 s
%time pd.read_csv('data.csv')
CPU times: user 3.3 s, sys: 299 ms, total: 3.6 s Wall time: 3.61 s
<class 'pandas.core.frame.DataFrame'> Int64Index: 2500000 entries, 0 to 2499999 Columns: 6 entries, no1 to no6 dtypes: float64(6)
ll dat*
-rw-r--r-- 1 yhilpisch staff 313359543 8 Nov 14:30 data.csp -rw-r--r-- 1 yhilpisch staff 296963744 8 Nov 14:29 data.csv
rows = 0.0
filename = 'data'
import os
try:
os.remove(filename + '.csv')
os.remove(filename + '.csp')
except:
pass
We consider a situation where we have to calculate the following sum:
\[result = \sum_{i=1}^{I} \sum_{j=1}^{I \cdot J} \cos (i \cdot j - i \cdot j)\]
This can be done via a nested loop of the following form:
from math import cos
def count_sum_py(I, J):
res = 0
for i in range(1, I + 1):
for j in range(1, (I * J) + 1):
res += cos(i * j - i * j)
return res
I, J = 1000, 150
%time count_sum_py(I, J)
CPU times: user 34.9 s, sys: 66 ms, total: 35 s Wall time: 35 s
150000000.0
import numba as nb
count_sum_nb = nb.autojit(count_sum_py)
%time count_sum_nb(I, J)
DEBUG -- translate:361:translate
; ModuleID = 'tmp.module.__main__.count_sum_py.10e8507d0'
@PyArray_API = linkonce_odr global i8** inttoptr (i64 4348295872 to i8**)
define double @__numba_specialized_0___main___2E_count_sum_py(i32 %I, i32 %J) {
entry:
%nsteps2 = alloca i64
%target_temp1 = alloca i64
%nsteps = alloca i64
%target_temp = alloca i64
%return_value = alloca double
store i64 1, i64* %target_temp, !tbaa !2
%0 = add i32 %I, 1
%1 = sext i32 %0 to i64
store i64 %1, i64* %nsteps, !tbaa !3
br label %"for_condition_4:13"
cleanup_label: ; preds = %"exit_for_4:4", %error_label
%2 = load double* %return_value
ret double %2
error_label: ; No predecessors!
store double 0x7FF8000000000000, double* %return_value
br label %cleanup_label
"for_condition_4:13": ; preds = %entry, %"exit_for_5:8"
%res_2 = phi double [ 0.000000e+00, %entry ], [ %res_3, %"exit_for_5:8" ]
%j_1 = phi i64 [ 123456789, %entry ], [ %j_2, %"exit_for_5:8" ]
%3 = load i64* %target_temp, !tbaa !2
%4 = load i64* %nsteps, !tbaa !3
%5 = icmp slt i64 %3, %4
%6 = icmp ne i1 %5, false
br i1 %6, label %"loop_body_5:8", label %"exit_for_4:4"
"exit_for_4:4": ; preds = %"for_condition_4:13"
store double %res_2, double* %return_value
br label %cleanup_label
"loop_body_5:8": ; preds = %"for_condition_4:13"
%7 = load i64* %target_temp, !tbaa !2
%8 = load i64* %target_temp, !tbaa !2
%9 = add i64 %8, 1
store i64 %9, i64* %target_temp, !tbaa !2
store i64 1, i64* %target_temp1, !tbaa !4
%10 = mul i32 %I, %J
%11 = add i32 %10, 1
%12 = sext i32 %11 to i64
store i64 %12, i64* %nsteps2, !tbaa !5
br label %"for_condition_5:17"
"for_condition_5:17": ; preds = %"loop_body_5:8", %"loop_body_6:19"
%res_3 = phi double [ %res_2, %"loop_body_5:8" ], [ %25, %"loop_body_6:19" ]
%j_2 = phi i64 [ %j_1, %"loop_body_5:8" ], [ %17, %"loop_body_6:19" ]
%13 = load i64* %target_temp1, !tbaa !4
%14 = load i64* %nsteps2, !tbaa !5
%15 = icmp slt i64 %13, %14
%16 = icmp ne i1 %15, false
br i1 %16, label %"loop_body_6:19", label %"exit_for_5:8"
"exit_for_5:8": ; preds = %"for_condition_5:17"
br label %"for_condition_4:13"
"loop_body_6:19": ; preds = %"for_condition_5:17"
%17 = load i64* %target_temp1, !tbaa !4
%18 = load i64* %target_temp1, !tbaa !4
%19 = add i64 %18, 1
store i64 %19, i64* %target_temp1, !tbaa !4
%20 = mul i64 %7, %17
%21 = mul i64 %7, %17
%22 = sub i64 %20, %21
%23 = sitofp i64 %22 to double
%24 = call double @"numba.math.['double'].cos"(double %23)
%25 = fadd double %res_3, %24
br label %"for_condition_5:17"
}
declare { i64, i8* }* @Py_BuildValue(i8*, ...)
declare i32 @PyArg_ParseTuple({ i64, i8* }*, i8*, ...)
declare void @PyErr_Clear()
declare double @"numba.math.['double'].cos"(double)
!tbaa = !{!0, !1, !2, !3, !4, !5}
!0 = metadata !{metadata !"root"}
!1 = metadata !{metadata !"char *", metadata !0}
!2 = metadata !{metadata !"unique0", metadata !1}
!3 = metadata !{metadata !"unique1", metadata !1}
!4 = metadata !{metadata !"unique2", metadata !1}
!5 = metadata !{metadata !"unique3", metadata !1}
DEBUG -- translate:361:translate
; ModuleID = 'numba_executable_module'
@PyArray_API = linkonce_odr global i8** inttoptr (i64 4348295872 to i8**)
define void @Py_INCREF({ i64, i8* }* %obj) {
decl:
%obj1 = alloca { i64, i8* }*
store { i64, i8* }* %obj, { i64, i8* }** %obj1
%0 = bitcast { i64, i8* }* %obj to i64*
%1 = load i64* %0
%2 = add i64 %1, 1
store i64 %2, i64* %0
ret void
}
define void @Py_DECREF({ i64, i8* }* %obj) {
decl:
%obj1 = alloca { i64, i8* }*
store { i64, i8* }* %obj, { i64, i8* }** %obj1
%0 = bitcast { i64, i8* }* %obj to i64*
%1 = load i64* %0
%2 = icmp sgt i64 %1, 1
br i1 %2, label %if.then, label %if.else
if.then: ; preds = %decl
%3 = add i64 %1, -1
store i64 %3, i64* %0
br label %if.end
if.else: ; preds = %decl
call void @Py_DecRef({ i64, i8* }* %obj)
br label %if.end
if.end: ; preds = %if.else, %if.then
ret void
}
declare void @Py_DecRef({ i64, i8* }*)
define void @Py_XINCREF({ i64, i8* }* %obj) {
decl:
%obj1 = alloca { i64, i8* }*
store { i64, i8* }* %obj, { i64, i8* }** %obj1
%0 = ptrtoint { i64, i8* }* %obj to i64
%1 = icmp ne i64 %0, 0
br i1 %1, label %if.then, label %if.end
if.then: ; preds = %decl
%2 = bitcast { i64, i8* }* %obj to i64*
%3 = load i64* %2
%4 = add i64 %3, 1
store i64 %4, i64* %2
br label %if.end
if.end: ; preds = %if.then, %decl
ret void
}
define void @Py_XDECREF({ i64, i8* }* %obj) {
decl:
%obj1 = alloca { i64, i8* }*
store { i64, i8* }* %obj, { i64, i8* }** %obj1
%0 = ptrtoint { i64, i8* }* %obj to i64
%1 = icmp ne i64 %0, 0
br i1 %1, label %if.then, label %if.end
if.then: ; preds = %decl
call void @Py_DECREF({ i64, i8* }* %obj)
br label %if.end
if.end: ; preds = %if.then, %decl
ret void
}
define i8* @IndexAxis(i8* %data, i64* %in_shape, i64* %in_strides, i64 %src_dim, i64 %index) {
decl:
%data1 = alloca i8*
%in_shape2 = alloca i64*
%in_strides3 = alloca i64*
%src_dim4 = alloca i64
%index5 = alloca i64
%result = alloca i8*
store i8* %data, i8** %data1
store i64* %in_shape, i64** %in_shape2
store i64* %in_strides, i64** %in_strides3
store i64 %src_dim, i64* %src_dim4
store i64 %index, i64* %index5
%0 = load i64** %in_strides3
%1 = load i64* %src_dim4
%2 = getelementptr inbounds i64* %0, i64 %1
%3 = load i64* %2
%4 = mul i64 %3, %index
%5 = load i8** %data1
%6 = getelementptr inbounds i8* %5, i64 %4
store i8* %6, i8** %result
ret i8* %6
}
define void @NewAxis(i64* %out_shape, i64* %out_strides, i32 %dst_dim) {
decl:
%out_shape1 = alloca i64*
%out_strides2 = alloca i64*
%dst_dim3 = alloca i32
store i64* %out_shape, i64** %out_shape1
store i64* %out_strides, i64** %out_strides2
store i32 %dst_dim, i32* %dst_dim3
%0 = load i64** %out_shape1
%1 = getelementptr inbounds i64* %0, i32 %dst_dim
store i64 1, i64* %1
%2 = load i64** %out_strides2
%3 = load i32* %dst_dim3
%4 = getelementptr inbounds i64* %2, i32 %3
store i64 0, i64* %4
ret void
}
define i32 @Broadcast(i64* %dst_shape, i64* %src_shape, i64* %src_strides, i32 %max_ndim, i32 %ndim) {
decl:
%dst_shape1 = alloca i64*
%src_shape2 = alloca i64*
%src_strides3 = alloca i64*
%max_ndim4 = alloca i32
%ndim5 = alloca i32
%0 = alloca i32
store i64* %dst_shape, i64** %dst_shape1
store i64* %src_shape, i64** %src_shape2
store i64* %src_strides, i64** %src_strides3
store i32 %max_ndim, i32* %max_ndim4
store i32 %ndim, i32* %ndim5
%1 = load i32* %max_ndim4
%2 = sub i32 %1, %ndim
store i32 0, i32* %0
br label %loop.cond
loop.cond: ; preds = %if.end11, %decl
%3 = load i32* %0
%4 = load i32* %ndim5
%5 = icmp slt i32 %3, %4
br i1 %5, label %loop.body, label %loop.end
loop.body: ; preds = %loop.cond
%6 = load i64** %src_shape2
%7 = getelementptr inbounds i64* %6, i32 %3
%8 = add i32 %3, %2
%9 = load i64** %dst_shape1
%10 = getelementptr inbounds i64* %9, i32 %8
%11 = load i64* %7
%12 = icmp eq i64 %11, 1
br i1 %12, label %if.then, label %if.else
loop.end: ; preds = %if.else7, %loop.cond
%merge = phi i32 [ 1, %loop.cond ], [ 0, %if.else7 ]
ret i32 %merge
if.then: ; preds = %loop.body
%13 = load i64** %src_strides3
%14 = getelementptr inbounds i64* %13, i32 %3
store i64 0, i64* %14
br label %if.end11
if.else: ; preds = %loop.body
%15 = load i64* %10
%16 = icmp eq i64 %15, 1
br i1 %16, label %if.then6, label %if.else7
if.then6: ; preds = %if.else
store i64 %11, i64* %10
br label %if.end11
if.else7: ; preds = %if.else
%17 = icmp ne i64 %11, %15
br i1 %17, label %loop.end, label %if.end11
if.end11: ; preds = %if.else7, %if.then6, %if.then
%18 = load i32* %0
%19 = add i32 %18, 1
store i32 %19, i32* %0
br label %loop.cond
}
define double @__numba_specialized_0___main___2E_count_sum_py(i32 %I, i32 %J) {
entry:
%0 = add i32 %I, 1
%1 = icmp sgt i32 %0, 1
br i1 %1, label %"loop_body_5:8.lr.ph", label %"exit_for_4:4"
"loop_body_5:8.lr.ph": ; preds = %entry
%2 = mul i32 %J, %I
%3 = add i32 %2, 1
%4 = icmp sgt i32 %3, 1
br i1 %4, label %"loop_body_5:8.lr.ph.split.us", label %"loop_body_5:8.lr.ph.loop_body_5:8.lr.ph.split_crit_edge"
"loop_body_5:8.lr.ph.loop_body_5:8.lr.ph.split_crit_edge": ; preds = %"loop_body_5:8.lr.ph"
%5 = add i32 %I, 1
%6 = sext i32 %5 to i64
%7 = add i64 %6, -1
br label %"for_condition_4:13.loopexit"
"loop_body_5:8.lr.ph.split.us": ; preds = %"loop_body_5:8.lr.ph"
%8 = sext i32 %0 to i64
%9 = mul i32 %J, %I
%10 = add i32 %9, 1
%11 = sext i32 %10 to i64
%12 = add i64 %11, -1
br label %"loop_body_6:19.lr.ph.us"
"loop_body_6:19.us": ; preds = %"loop_body_6:19.us", %"loop_body_6:19.lr.ph.us"
%lsr.iv = phi i64 [ %lsr.iv.next, %"loop_body_6:19.us" ], [ %12, %"loop_body_6:19.lr.ph.us" ]
%res_35.us = phi double [ %res_26.us, %"loop_body_6:19.lr.ph.us" ], [ %14, %"loop_body_6:19.us" ]
%13 = tail call double @"numba.math.['double'].cos"(double 0.000000e+00)
%14 = fadd double %res_35.us, %13
%lsr.iv.next = add i64 %lsr.iv, -1
%exitcond8 = icmp eq i64 %lsr.iv.next, 0
br i1 %exitcond8, label %"for_condition_4:13.loopexit.us", label %"loop_body_6:19.us"
"for_condition_4:13.loopexit.us": ; preds = %"loop_body_6:19.us"
%exitcond9 = icmp eq i64 %16, %8
br i1 %exitcond9, label %"exit_for_4:4", label %"loop_body_6:19.lr.ph.us"
"loop_body_6:19.lr.ph.us": ; preds = %"loop_body_5:8.lr.ph.split.us", %"for_condition_4:13.loopexit.us"
%res_26.us = phi double [ 0.000000e+00, %"loop_body_5:8.lr.ph.split.us" ], [ %14, %"for_condition_4:13.loopexit.us" ]
%15 = phi i64 [ 1, %"loop_body_5:8.lr.ph.split.us" ], [ %16, %"for_condition_4:13.loopexit.us" ]
%16 = add i64 %15, 1
br label %"loop_body_6:19.us"
"for_condition_4:13.loopexit": ; preds = %"loop_body_5:8.lr.ph.loop_body_5:8.lr.ph.split_crit_edge", %"for_condition_4:13.loopexit"
%lsr.iv2 = phi i64 [ %7, %"loop_body_5:8.lr.ph.loop_body_5:8.lr.ph.split_crit_edge" ], [ %lsr.iv.next3, %"for_condition_4:13.loopexit" ]
%lsr.iv.next3 = add i64 %lsr.iv2, -1
%exitcond = icmp eq i64 %lsr.iv.next3, 0
br i1 %exitcond, label %"exit_for_4:4", label %"for_condition_4:13.loopexit"
"exit_for_4:4": ; preds = %"for_condition_4:13.loopexit", %"for_condition_4:13.loopexit.us", %entry
%res_2.lcssa = phi double [ 0.000000e+00, %entry ], [ %14, %"for_condition_4:13.loopexit.us" ], [ 0.000000e+00, %"for_condition_4:13.loopexit" ]
ret double %res_2.lcssa
}
declare double @"numba.math.['double'].cos"(double)
define { i64, i8* }* @__numba_specialized_1_numba_2E_codegen_2E_llvmwrapper_2E___numba_wrapper_count_sum_py(i8* %self, { i64, i8* }* %args) {
entry:
%objtemp = alloca { i64, i8* }*
store { i64, i8* }* null, { i64, i8* }** %objtemp, !tbaa !6
%0 = alloca { i64, i8* }*
%1 = alloca { i64, i8* }*
%return_value = alloca { i64, i8* }*
%2 = call i32 ({ i64, i8* }*, i8*, ...)* @PyArg_ParseTuple({ i64, i8* }* %args, i8* getelementptr inbounds ([3 x i8]* @__STR_0, i32 0, i32 0), { i64, i8* }** %1, { i64, i8* }** %0)
%3 = icmp eq i32 %2, 0
br i1 %3, label %cleanup.if.true, label %cleanup.if.end
cleanup_label: ; preds = %no_error, %error_label
%4 = load { i64, i8* }** %objtemp, !tbaa !6
call void @Py_XDECREF({ i64, i8* }* %4)
%5 = load { i64, i8* }** %return_value
ret { i64, i8* }* %5
error_label: ; preds = %empty7, %empty8, %empty5, %empty2, %cleanup.if.true
store { i64, i8* }* null, { i64, i8* }** %return_value
%6 = load { i64, i8* }** %return_value, !tbaa !6
call void @Py_XINCREF({ i64, i8* }* %6)
br label %cleanup_label
cleanup.if.true: ; preds = %entry
br label %error_label
cleanup.if.end: ; preds = %entry
%7 = load { i64, i8* }** %1
%8 = load { i64, i8* }** %0
%9 = call i32 inttoptr (i64 4461400210 to i32 ({ i64, i8* }*)*)({ i64, i8* }* %7)
br label %empty
empty: ; preds = %cleanup.if.end
%10 = call i8* @PyErr_Occurred()
%11 = ptrtoint i8* %10 to i64
%12 = icmp ne i64 %11, 0
br i1 %12, label %empty2, label %empty1
empty1: ; preds = %empty
%13 = call i32 inttoptr (i64 4461400210 to i32 ({ i64, i8* }*)*)({ i64, i8* }* %8)
br label %empty3
empty2: ; preds = %empty
br label %error_label
empty3: ; preds = %empty1
%14 = call i8* @PyErr_Occurred()
%15 = ptrtoint i8* %14 to i64
%16 = icmp ne i64 %15, 0
br i1 %16, label %empty5, label %empty4
empty4: ; preds = %empty3
%17 = call double @__numba_specialized_0___main___2E_count_sum_py(i32 %9, i32 %13)
br label %empty6
empty5: ; preds = %empty3
br label %error_label
empty6: ; preds = %empty4
%18 = call i8* @PyErr_Occurred()
%19 = ptrtoint i8* %18 to i64
%20 = icmp ne i64 %19, 0
br i1 %20, label %empty8, label %empty7
empty7: ; preds = %empty6
%21 = call { i64, i8* }* @PyFloat_FromDouble(double %17)
store { i64, i8* }* %21, { i64, i8* }** %objtemp, !tbaa !6
%22 = ptrtoint { i64, i8* }* %21 to i64
%23 = icmp eq i64 %22, 0
br i1 %23, label %error_label, label %no_error
empty8: ; preds = %empty6
br label %error_label
no_error: ; preds = %empty7
%24 = load { i64, i8* }** %objtemp, !tbaa !6
store { i64, i8* }* %24, { i64, i8* }** %return_value
%25 = load { i64, i8* }** %return_value, !tbaa !6
call void @Py_XINCREF({ i64, i8* }* %25)
br label %cleanup_label
}
declare { i64, i8* }* @PyFloat_FromDouble(double)
declare i8* @PyErr_Occurred()
declare { i64, i8* }* @Py_BuildValue(i8*, ...)
declare i32 @PyArg_ParseTuple({ i64, i8* }*, i8*, ...)
declare void @PyErr_Clear()
!tbaa = !{!0, !1, !2, !3, !4, !5, !0, !1, !6}
!0 = metadata !{metadata !"root"}
!1 = metadata !{metadata !"char *", metadata !0}
!2 = metadata !{metadata !"unique0", metadata !1}
!3 = metadata !{metadata !"unique1", metadata !1}
!4 = metadata !{metadata !"unique2", metadata !1}
!5 = metadata !{metadata !"unique3", metadata !1}
!6 = metadata !{metadata !"object", metadata !1}
CPU times: user 1.24 s, sys: 40 ms, total: 1.28 s Wall time: 1.3 s
150000000.0
func_list = ['count_sum_py', 'count_sum_nb']
data_list = ['I, J', 'I, J']
perf_comp_data(func_list, data_list, rep=3)
function: count_sum_nb, av. time sec: 0.90074, relative: 1.0 function: count_sum_py, av. time sec: 34.02543, relative: 37.8

The Python Quants – the company Web site
Dr. Yves J. Hilpisch – my personal Web site
Derivatives Analytics with Python – my new book
Contact Us