'
Exercises
Dr. Yves J. Hilpisch
The Python Quants GmbH
PyData, New York City – 08. November 2013
As an exercise we want to implent functions to calculate the sum of the square root of the (positive) numbers in an array.
I = 1000000
a_py = range(I)
def f_py(a):
res = 0
for x in a:
res += x ** 0.5
return res
%time f_py(a_py)
CPU times: user 308 ms, sys: 93.6 ms, total: 402 ms Wall time: 314 ms
666666166.4588418
import numpy as np
a_np = np.arange(I)
def f_np(a):
return sum(a ** 0.5)
%time f_np(a_np)
CPU times: user 42.5 ms, sys: 5.05 ms, total: 47.6 ms Wall time: 47 ms
666666166.4588418
import numexpr as ne
def f_ne(a):
ex = 'sum(a ** 0.5)'
return ne.evaluate(ex)
%time f_ne(a_np)
CPU times: user 13 ms, sys: 690 µs, total: 13.7 ms Wall time: 12.9 ms
array(666666166.4588418)
from perf_comp_data import perf_comp_data
func_list = ['f_py', 'f_np', 'f_ne']
data_list = ['a_py', 'a_np', 'a_np']
perf_comp_data(func_list, data_list, rep=3)
function: f_ne, av. time sec: 0.01169, relative: 1.0 function: f_np, av. time sec: 0.04157, relative: 3.6 function: f_py, av. time sec: 0.19372, relative: 16.6
We want to generate a larger set of (pseudo-)random numbers to do the following:
The data should be of shape 2,500,000 rows with 6 numbers each.
from random import gauss
I, J = 2500000, 6
def gen_rows():
rows = []
for i in range(I):
rows.append([gauss(0, 1) for j in range(J)])
return rows
%time rows = gen_rows()
CPU times: user 22.5 s, sys: 525 ms, total: 23 s Wall time: 22.9 s
import csv
fi = open('data.csv', 'w')
csv_fi = csv.writer(fi)
csv_fi.writerow(['no1', 'no2', 'no3', 'no4', 'no5', 'no6'])
%time csv_fi.writerows(rows)
CPU times: user 24.5 s, sys: 534 ms, total: 25 s Wall time: 25.2 s
fi.close()
ll dat*
-rw-r--r-- 1 yhilpisch staff 296963744 8 Nov 14:29 data.csv
fi = open('data.csv', 'r')
for k in range(3):
print fi.readline(),
fi.close()
no1,no2,no3,no4,no5,no6 0.1644190526862174,0.4854525741415644,-0.8508669414572848,0.010516873383551673,-0.6460384655398392,-0.8541009269273129 -2.5892970908529787,1.3031672069809515,-0.43625524948867084,-0.8328548834797471,-0.37074691782996283,-0.31223264709143794
fi = open('data.csv', 'r')
csv_fi = csv.reader(fi)
def read_csv():
data = []
for line in csv_fi:
data.append(line)
return data
%time data = read_csv()
CPU times: user 7.7 s, sys: 1.83 s, total: 9.53 s Wall time: 10.3 s
data = 0.0; rows = 0.0
pandas does not bring too much performance improvement but a significant improvement in convenience.
import pandas as pd
%time rows = pd.DataFrame(np.random.standard_normal((I, J)))
CPU times: user 672 ms, sys: 71.3 ms, total: 744 ms Wall time: 745 ms
%time rows.to_csv('data.csp')
CPU times: user 27.7 s, sys: 603 ms, total: 28.3 s Wall time: 28.4 s
%time pd.read_csv('data.csv')
CPU times: user 3.3 s, sys: 299 ms, total: 3.6 s Wall time: 3.61 s
<class 'pandas.core.frame.DataFrame'> Int64Index: 2500000 entries, 0 to 2499999 Columns: 6 entries, no1 to no6 dtypes: float64(6)
ll dat*
-rw-r--r-- 1 yhilpisch staff 313359543 8 Nov 14:30 data.csp -rw-r--r-- 1 yhilpisch staff 296963744 8 Nov 14:29 data.csv
rows = 0.0
filename = 'data'
import os
try:
os.remove(filename + '.csv')
os.remove(filename + '.csp')
except:
pass
We consider a situation where we have to calculate the following sum:
\[result = \sum_{i=1}^{I} \sum_{j=1}^{I \cdot J} \cos (i \cdot j - i \cdot j)\]
This can be done via a nested loop of the following form:
from math import cos
def count_sum_py(I, J):
res = 0
for i in range(1, I + 1):
for j in range(1, (I * J) + 1):
res += cos(i * j - i * j)
return res
I, J = 1000, 150
%time count_sum_py(I, J)
CPU times: user 34.9 s, sys: 66 ms, total: 35 s Wall time: 35 s
150000000.0
import numba as nb
count_sum_nb = nb.autojit(count_sum_py)
%time count_sum_nb(I, J)
DEBUG -- translate:361:translate ; ModuleID = 'tmp.module.__main__.count_sum_py.10e8507d0' @PyArray_API = linkonce_odr global i8** inttoptr (i64 4348295872 to i8**) define double @__numba_specialized_0___main___2E_count_sum_py(i32 %I, i32 %J) { entry: %nsteps2 = alloca i64 %target_temp1 = alloca i64 %nsteps = alloca i64 %target_temp = alloca i64 %return_value = alloca double store i64 1, i64* %target_temp, !tbaa !2 %0 = add i32 %I, 1 %1 = sext i32 %0 to i64 store i64 %1, i64* %nsteps, !tbaa !3 br label %"for_condition_4:13" cleanup_label: ; preds = %"exit_for_4:4", %error_label %2 = load double* %return_value ret double %2 error_label: ; No predecessors! store double 0x7FF8000000000000, double* %return_value br label %cleanup_label "for_condition_4:13": ; preds = %entry, %"exit_for_5:8" %res_2 = phi double [ 0.000000e+00, %entry ], [ %res_3, %"exit_for_5:8" ] %j_1 = phi i64 [ 123456789, %entry ], [ %j_2, %"exit_for_5:8" ] %3 = load i64* %target_temp, !tbaa !2 %4 = load i64* %nsteps, !tbaa !3 %5 = icmp slt i64 %3, %4 %6 = icmp ne i1 %5, false br i1 %6, label %"loop_body_5:8", label %"exit_for_4:4" "exit_for_4:4": ; preds = %"for_condition_4:13" store double %res_2, double* %return_value br label %cleanup_label "loop_body_5:8": ; preds = %"for_condition_4:13" %7 = load i64* %target_temp, !tbaa !2 %8 = load i64* %target_temp, !tbaa !2 %9 = add i64 %8, 1 store i64 %9, i64* %target_temp, !tbaa !2 store i64 1, i64* %target_temp1, !tbaa !4 %10 = mul i32 %I, %J %11 = add i32 %10, 1 %12 = sext i32 %11 to i64 store i64 %12, i64* %nsteps2, !tbaa !5 br label %"for_condition_5:17" "for_condition_5:17": ; preds = %"loop_body_5:8", %"loop_body_6:19" %res_3 = phi double [ %res_2, %"loop_body_5:8" ], [ %25, %"loop_body_6:19" ] %j_2 = phi i64 [ %j_1, %"loop_body_5:8" ], [ %17, %"loop_body_6:19" ] %13 = load i64* %target_temp1, !tbaa !4 %14 = load i64* %nsteps2, !tbaa !5 %15 = icmp slt i64 %13, %14 %16 = icmp ne i1 %15, false br i1 %16, label %"loop_body_6:19", label %"exit_for_5:8" "exit_for_5:8": ; preds = %"for_condition_5:17" br label %"for_condition_4:13" "loop_body_6:19": ; preds = %"for_condition_5:17" %17 = load i64* %target_temp1, !tbaa !4 %18 = load i64* %target_temp1, !tbaa !4 %19 = add i64 %18, 1 store i64 %19, i64* %target_temp1, !tbaa !4 %20 = mul i64 %7, %17 %21 = mul i64 %7, %17 %22 = sub i64 %20, %21 %23 = sitofp i64 %22 to double %24 = call double @"numba.math.['double'].cos"(double %23) %25 = fadd double %res_3, %24 br label %"for_condition_5:17" } declare { i64, i8* }* @Py_BuildValue(i8*, ...) declare i32 @PyArg_ParseTuple({ i64, i8* }*, i8*, ...) declare void @PyErr_Clear() declare double @"numba.math.['double'].cos"(double) !tbaa = !{!0, !1, !2, !3, !4, !5} !0 = metadata !{metadata !"root"} !1 = metadata !{metadata !"char *", metadata !0} !2 = metadata !{metadata !"unique0", metadata !1} !3 = metadata !{metadata !"unique1", metadata !1} !4 = metadata !{metadata !"unique2", metadata !1} !5 = metadata !{metadata !"unique3", metadata !1} DEBUG -- translate:361:translate ; ModuleID = 'numba_executable_module' @PyArray_API = linkonce_odr global i8** inttoptr (i64 4348295872 to i8**) define void @Py_INCREF({ i64, i8* }* %obj) { decl: %obj1 = alloca { i64, i8* }* store { i64, i8* }* %obj, { i64, i8* }** %obj1 %0 = bitcast { i64, i8* }* %obj to i64* %1 = load i64* %0 %2 = add i64 %1, 1 store i64 %2, i64* %0 ret void } define void @Py_DECREF({ i64, i8* }* %obj) { decl: %obj1 = alloca { i64, i8* }* store { i64, i8* }* %obj, { i64, i8* }** %obj1 %0 = bitcast { i64, i8* }* %obj to i64* %1 = load i64* %0 %2 = icmp sgt i64 %1, 1 br i1 %2, label %if.then, label %if.else if.then: ; preds = %decl %3 = add i64 %1, -1 store i64 %3, i64* %0 br label %if.end if.else: ; preds = %decl call void @Py_DecRef({ i64, i8* }* %obj) br label %if.end if.end: ; preds = %if.else, %if.then ret void } declare void @Py_DecRef({ i64, i8* }*) define void @Py_XINCREF({ i64, i8* }* %obj) { decl: %obj1 = alloca { i64, i8* }* store { i64, i8* }* %obj, { i64, i8* }** %obj1 %0 = ptrtoint { i64, i8* }* %obj to i64 %1 = icmp ne i64 %0, 0 br i1 %1, label %if.then, label %if.end if.then: ; preds = %decl %2 = bitcast { i64, i8* }* %obj to i64* %3 = load i64* %2 %4 = add i64 %3, 1 store i64 %4, i64* %2 br label %if.end if.end: ; preds = %if.then, %decl ret void } define void @Py_XDECREF({ i64, i8* }* %obj) { decl: %obj1 = alloca { i64, i8* }* store { i64, i8* }* %obj, { i64, i8* }** %obj1 %0 = ptrtoint { i64, i8* }* %obj to i64 %1 = icmp ne i64 %0, 0 br i1 %1, label %if.then, label %if.end if.then: ; preds = %decl call void @Py_DECREF({ i64, i8* }* %obj) br label %if.end if.end: ; preds = %if.then, %decl ret void } define i8* @IndexAxis(i8* %data, i64* %in_shape, i64* %in_strides, i64 %src_dim, i64 %index) { decl: %data1 = alloca i8* %in_shape2 = alloca i64* %in_strides3 = alloca i64* %src_dim4 = alloca i64 %index5 = alloca i64 %result = alloca i8* store i8* %data, i8** %data1 store i64* %in_shape, i64** %in_shape2 store i64* %in_strides, i64** %in_strides3 store i64 %src_dim, i64* %src_dim4 store i64 %index, i64* %index5 %0 = load i64** %in_strides3 %1 = load i64* %src_dim4 %2 = getelementptr inbounds i64* %0, i64 %1 %3 = load i64* %2 %4 = mul i64 %3, %index %5 = load i8** %data1 %6 = getelementptr inbounds i8* %5, i64 %4 store i8* %6, i8** %result ret i8* %6 } define void @NewAxis(i64* %out_shape, i64* %out_strides, i32 %dst_dim) { decl: %out_shape1 = alloca i64* %out_strides2 = alloca i64* %dst_dim3 = alloca i32 store i64* %out_shape, i64** %out_shape1 store i64* %out_strides, i64** %out_strides2 store i32 %dst_dim, i32* %dst_dim3 %0 = load i64** %out_shape1 %1 = getelementptr inbounds i64* %0, i32 %dst_dim store i64 1, i64* %1 %2 = load i64** %out_strides2 %3 = load i32* %dst_dim3 %4 = getelementptr inbounds i64* %2, i32 %3 store i64 0, i64* %4 ret void } define i32 @Broadcast(i64* %dst_shape, i64* %src_shape, i64* %src_strides, i32 %max_ndim, i32 %ndim) { decl: %dst_shape1 = alloca i64* %src_shape2 = alloca i64* %src_strides3 = alloca i64* %max_ndim4 = alloca i32 %ndim5 = alloca i32 %0 = alloca i32 store i64* %dst_shape, i64** %dst_shape1 store i64* %src_shape, i64** %src_shape2 store i64* %src_strides, i64** %src_strides3 store i32 %max_ndim, i32* %max_ndim4 store i32 %ndim, i32* %ndim5 %1 = load i32* %max_ndim4 %2 = sub i32 %1, %ndim store i32 0, i32* %0 br label %loop.cond loop.cond: ; preds = %if.end11, %decl %3 = load i32* %0 %4 = load i32* %ndim5 %5 = icmp slt i32 %3, %4 br i1 %5, label %loop.body, label %loop.end loop.body: ; preds = %loop.cond %6 = load i64** %src_shape2 %7 = getelementptr inbounds i64* %6, i32 %3 %8 = add i32 %3, %2 %9 = load i64** %dst_shape1 %10 = getelementptr inbounds i64* %9, i32 %8 %11 = load i64* %7 %12 = icmp eq i64 %11, 1 br i1 %12, label %if.then, label %if.else loop.end: ; preds = %if.else7, %loop.cond %merge = phi i32 [ 1, %loop.cond ], [ 0, %if.else7 ] ret i32 %merge if.then: ; preds = %loop.body %13 = load i64** %src_strides3 %14 = getelementptr inbounds i64* %13, i32 %3 store i64 0, i64* %14 br label %if.end11 if.else: ; preds = %loop.body %15 = load i64* %10 %16 = icmp eq i64 %15, 1 br i1 %16, label %if.then6, label %if.else7 if.then6: ; preds = %if.else store i64 %11, i64* %10 br label %if.end11 if.else7: ; preds = %if.else %17 = icmp ne i64 %11, %15 br i1 %17, label %loop.end, label %if.end11 if.end11: ; preds = %if.else7, %if.then6, %if.then %18 = load i32* %0 %19 = add i32 %18, 1 store i32 %19, i32* %0 br label %loop.cond } define double @__numba_specialized_0___main___2E_count_sum_py(i32 %I, i32 %J) { entry: %0 = add i32 %I, 1 %1 = icmp sgt i32 %0, 1 br i1 %1, label %"loop_body_5:8.lr.ph", label %"exit_for_4:4" "loop_body_5:8.lr.ph": ; preds = %entry %2 = mul i32 %J, %I %3 = add i32 %2, 1 %4 = icmp sgt i32 %3, 1 br i1 %4, label %"loop_body_5:8.lr.ph.split.us", label %"loop_body_5:8.lr.ph.loop_body_5:8.lr.ph.split_crit_edge" "loop_body_5:8.lr.ph.loop_body_5:8.lr.ph.split_crit_edge": ; preds = %"loop_body_5:8.lr.ph" %5 = add i32 %I, 1 %6 = sext i32 %5 to i64 %7 = add i64 %6, -1 br label %"for_condition_4:13.loopexit" "loop_body_5:8.lr.ph.split.us": ; preds = %"loop_body_5:8.lr.ph" %8 = sext i32 %0 to i64 %9 = mul i32 %J, %I %10 = add i32 %9, 1 %11 = sext i32 %10 to i64 %12 = add i64 %11, -1 br label %"loop_body_6:19.lr.ph.us" "loop_body_6:19.us": ; preds = %"loop_body_6:19.us", %"loop_body_6:19.lr.ph.us" %lsr.iv = phi i64 [ %lsr.iv.next, %"loop_body_6:19.us" ], [ %12, %"loop_body_6:19.lr.ph.us" ] %res_35.us = phi double [ %res_26.us, %"loop_body_6:19.lr.ph.us" ], [ %14, %"loop_body_6:19.us" ] %13 = tail call double @"numba.math.['double'].cos"(double 0.000000e+00) %14 = fadd double %res_35.us, %13 %lsr.iv.next = add i64 %lsr.iv, -1 %exitcond8 = icmp eq i64 %lsr.iv.next, 0 br i1 %exitcond8, label %"for_condition_4:13.loopexit.us", label %"loop_body_6:19.us" "for_condition_4:13.loopexit.us": ; preds = %"loop_body_6:19.us" %exitcond9 = icmp eq i64 %16, %8 br i1 %exitcond9, label %"exit_for_4:4", label %"loop_body_6:19.lr.ph.us" "loop_body_6:19.lr.ph.us": ; preds = %"loop_body_5:8.lr.ph.split.us", %"for_condition_4:13.loopexit.us" %res_26.us = phi double [ 0.000000e+00, %"loop_body_5:8.lr.ph.split.us" ], [ %14, %"for_condition_4:13.loopexit.us" ] %15 = phi i64 [ 1, %"loop_body_5:8.lr.ph.split.us" ], [ %16, %"for_condition_4:13.loopexit.us" ] %16 = add i64 %15, 1 br label %"loop_body_6:19.us" "for_condition_4:13.loopexit": ; preds = %"loop_body_5:8.lr.ph.loop_body_5:8.lr.ph.split_crit_edge", %"for_condition_4:13.loopexit" %lsr.iv2 = phi i64 [ %7, %"loop_body_5:8.lr.ph.loop_body_5:8.lr.ph.split_crit_edge" ], [ %lsr.iv.next3, %"for_condition_4:13.loopexit" ] %lsr.iv.next3 = add i64 %lsr.iv2, -1 %exitcond = icmp eq i64 %lsr.iv.next3, 0 br i1 %exitcond, label %"exit_for_4:4", label %"for_condition_4:13.loopexit" "exit_for_4:4": ; preds = %"for_condition_4:13.loopexit", %"for_condition_4:13.loopexit.us", %entry %res_2.lcssa = phi double [ 0.000000e+00, %entry ], [ %14, %"for_condition_4:13.loopexit.us" ], [ 0.000000e+00, %"for_condition_4:13.loopexit" ] ret double %res_2.lcssa } declare double @"numba.math.['double'].cos"(double) define { i64, i8* }* @__numba_specialized_1_numba_2E_codegen_2E_llvmwrapper_2E___numba_wrapper_count_sum_py(i8* %self, { i64, i8* }* %args) { entry: %objtemp = alloca { i64, i8* }* store { i64, i8* }* null, { i64, i8* }** %objtemp, !tbaa !6 %0 = alloca { i64, i8* }* %1 = alloca { i64, i8* }* %return_value = alloca { i64, i8* }* %2 = call i32 ({ i64, i8* }*, i8*, ...)* @PyArg_ParseTuple({ i64, i8* }* %args, i8* getelementptr inbounds ([3 x i8]* @__STR_0, i32 0, i32 0), { i64, i8* }** %1, { i64, i8* }** %0) %3 = icmp eq i32 %2, 0 br i1 %3, label %cleanup.if.true, label %cleanup.if.end cleanup_label: ; preds = %no_error, %error_label %4 = load { i64, i8* }** %objtemp, !tbaa !6 call void @Py_XDECREF({ i64, i8* }* %4) %5 = load { i64, i8* }** %return_value ret { i64, i8* }* %5 error_label: ; preds = %empty7, %empty8, %empty5, %empty2, %cleanup.if.true store { i64, i8* }* null, { i64, i8* }** %return_value %6 = load { i64, i8* }** %return_value, !tbaa !6 call void @Py_XINCREF({ i64, i8* }* %6) br label %cleanup_label cleanup.if.true: ; preds = %entry br label %error_label cleanup.if.end: ; preds = %entry %7 = load { i64, i8* }** %1 %8 = load { i64, i8* }** %0 %9 = call i32 inttoptr (i64 4461400210 to i32 ({ i64, i8* }*)*)({ i64, i8* }* %7) br label %empty empty: ; preds = %cleanup.if.end %10 = call i8* @PyErr_Occurred() %11 = ptrtoint i8* %10 to i64 %12 = icmp ne i64 %11, 0 br i1 %12, label %empty2, label %empty1 empty1: ; preds = %empty %13 = call i32 inttoptr (i64 4461400210 to i32 ({ i64, i8* }*)*)({ i64, i8* }* %8) br label %empty3 empty2: ; preds = %empty br label %error_label empty3: ; preds = %empty1 %14 = call i8* @PyErr_Occurred() %15 = ptrtoint i8* %14 to i64 %16 = icmp ne i64 %15, 0 br i1 %16, label %empty5, label %empty4 empty4: ; preds = %empty3 %17 = call double @__numba_specialized_0___main___2E_count_sum_py(i32 %9, i32 %13) br label %empty6 empty5: ; preds = %empty3 br label %error_label empty6: ; preds = %empty4 %18 = call i8* @PyErr_Occurred() %19 = ptrtoint i8* %18 to i64 %20 = icmp ne i64 %19, 0 br i1 %20, label %empty8, label %empty7 empty7: ; preds = %empty6 %21 = call { i64, i8* }* @PyFloat_FromDouble(double %17) store { i64, i8* }* %21, { i64, i8* }** %objtemp, !tbaa !6 %22 = ptrtoint { i64, i8* }* %21 to i64 %23 = icmp eq i64 %22, 0 br i1 %23, label %error_label, label %no_error empty8: ; preds = %empty6 br label %error_label no_error: ; preds = %empty7 %24 = load { i64, i8* }** %objtemp, !tbaa !6 store { i64, i8* }* %24, { i64, i8* }** %return_value %25 = load { i64, i8* }** %return_value, !tbaa !6 call void @Py_XINCREF({ i64, i8* }* %25) br label %cleanup_label } declare { i64, i8* }* @PyFloat_FromDouble(double) declare i8* @PyErr_Occurred() declare { i64, i8* }* @Py_BuildValue(i8*, ...) declare i32 @PyArg_ParseTuple({ i64, i8* }*, i8*, ...) declare void @PyErr_Clear() !tbaa = !{!0, !1, !2, !3, !4, !5, !0, !1, !6} !0 = metadata !{metadata !"root"} !1 = metadata !{metadata !"char *", metadata !0} !2 = metadata !{metadata !"unique0", metadata !1} !3 = metadata !{metadata !"unique1", metadata !1} !4 = metadata !{metadata !"unique2", metadata !1} !5 = metadata !{metadata !"unique3", metadata !1} !6 = metadata !{metadata !"object", metadata !1}
CPU times: user 1.24 s, sys: 40 ms, total: 1.28 s Wall time: 1.3 s
150000000.0
func_list = ['count_sum_py', 'count_sum_nb']
data_list = ['I, J', 'I, J']
perf_comp_data(func_list, data_list, rep=3)
function: count_sum_nb, av. time sec: 0.90074, relative: 1.0 function: count_sum_py, av. time sec: 34.02543, relative: 37.8
The Python Quants – the company Web site
Dr. Yves J. Hilpisch – my personal Web site
Derivatives Analytics with Python – my new book
Contact Us