我正在编写一个 numba 函数来计算投资组合的波动率:
我用来执行此操作的一些功能在这里:
import numba as nb
import numpy as np
def portfolio_s2( cv, weights ):
""" Calculate the variance of a portfolio """
return weights.dot( cv ).dot( weights )
@nb.jit( nopython=True )
def portfolio_s2c( cv, weights ):
s0 = 0.0
for i in range( weights.shape[0]-1 ):
wi = weights[i]
s0 += cv[i,i]*wi*wi
s1 = 0.0
for j in range( i+1, weights.shape[0] ):
s1 += wi*weights[j]*cv[i,j]
s0 += 2.0*s1
i = weights.shape[0]-1
wi = weights[ i ]
s0 += cv[i,i]*wi**2
return s0
@nb.jit( nopython=True )
def portfolio_s2b( cv, weights ):
s0 = 0.0
for i in range( weights.shape[0] ):
s0 += weights[i]*weights[i]*cv[i,i]
s1 = 0.0
for i in range( weights.shape[0]-1 ):
s2 = 0.0
for j in range( i+1, weights.shape[0] ):
s2 += weights[j]*cv[i,j]
s1+= weights[i]*s2
return s0+2.0*s1
我正在使用以下代码测试函数的性能:
N = 1000
num_tests = 10000
times_2b = []
times_2c = []
times_np = []
matrix_sizes = [ 2,4,8, 10, 20, 40, 80, 160 ]#, 320, 640, 1280, 2560 ]
for m in matrix_sizes:
X = np.random.randn( N, m )
cv = np.cov( X, rowvar=0 )
w = np.ones( cv.shape[0] ) / cv.shape[0]
s2 = helpers.portfolio_s2( cv, w )
s2b = helpers.portfolio_s2b( cv, w )
s2c = helpers.portfolio_s2c( cv, w )
np.testing.assert_almost_equal( s2, s2b )
np.testing.assert_almost_equal( s2, s2c )
with Timer( 'nb2b' ) as t2b:
for _ in range(num_tests):
helpers.portfolio_s2b( cv, w )
with Timer( 'nb2c' ) as t2c:
for _ in range(num_tests):
helpers.portfolio_s2c( cv, w )
with Timer( 'np' ) as tnp:
for _ in range(num_tests):
helpers.portfolio_s2( cv, w )
times_2b.append( t2b.timetaken )
times_2c.append( t2c.timetaken )
times_np.append( tnp.timetaken )
plt.figure()
plt.plot( matrix_sizes, times_2b, label='2b' )
plt.plot( matrix_sizes, times_2c, label='2c' )
plt.plot( matrix_sizes, times_np, label='np' )
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
这是定时器类:
import time
class Timer( object ):
def __init__(self, name=''):
self._name = name
def __enter__(self):
self.start = time.time()
return self
def __exit__(self,a,b,c):
self.end = time.time()
self.timetaken = self.end-self.start
print( '{0} Took {1} seconds'.format( self._name, self.timetaken ))
结果绘制在此处:
结果表明:
- 对于 80 以下的矩阵大小,该函数的 numba 版本优于 numpy 版本
- numba 版本的扩展性似乎比 numpy 函数差
为什么是这样?与调用 numba 相比,调用 numpy 是否存在某种开销?
为什么 numpy 函数可以更好地扩展?它是在后台使用 BLAS 做一些花哨的事情,还是使用高级算法进行计算?
我可以使 numba 函数缩放以及 numpy 函数吗?