就速度而言,numpy.inner可能是固定n的最佳选择。numpy.trapz应该收敛得更快。不管怎样,如果你担心速度,你也应该考虑到函数本身的评估也会花费一些时间。在
下面是我使用不同的内部产品实现运行的简单基准测试。在
时间安排
下图显示了只计算积分的运行时,即不显示函数求值。虽然numpy.trapz是慢一些的常数因子,numpy.inner与直接调用BLAS一样快。正如Ophion指出的,numpy.inner在内部调用BLAS,可能会增加一些输入检查的开销。
同样有趣的是,我们需要花时间来计算函数本身,这当然是为了计算内积。下面的一个图显示了标准超越函数numpy.sin、numpy.sqrt和numpy.exp的求值。当然,对于产品的评估和总和,缩放比例是相同的,所需的总时间是可比较的
错误
最后,我们还应该考虑不同方法的准确性,而这正是它真正有趣的地方。下面是计算
的不同实现的收敛图。在这里我们可以看到numpy.trapz实际上比其他两个实现的伸缩性好得多,在我耗尽内存之前,它们甚至达不到机器精度。在
结论
考虑到numpy.inner的不好的收敛性,我会选择numpy.trapz。但即便如此,仍需要大量的集成节点才能获得满意的精度。因为你的积分域是固定的,你甚至可以尝试更高阶的求积。在
代码import numpy as np
import matplotlib.pyplot as plt
import seaborn as sls
from scipy.linalg.blas import ddot
import timeit
## Define inner product.
def l2_inner_blas( f, g, dx ):
return ddot( f, g )*dx / np.pi
def l2_inner( f, g, dx ):
return np.inner( f, g )*dx / np.pi
def l2_inner_trapz( f, g, dx ):
return np.trapz(f*g,dx=dx) / np.pi
sin1 = lambda x: np.sin( x )
sin2 = lambda x: np.sin( 2.0 * x)
## Timing setups.
setup1 = "import numpy as np; from __main__ import l2_inner,"
setup1 += "l2_inner_trapz, l2_inner_blas, sin1, sin2;"
setup1 += "n=%d; x=np.linspace(-np.pi,np.pi,n); dx=2.0*np.pi/(n-1);"
setup1 += "f=sin1(x); g=sin2(x);"
def time( n ):
setupstr = setup1 % n
time1 = timeit.timeit( 'l2_inner( f, g, dx)', setupstr, number=10 )
time2 = timeit.timeit( 'l2_inner_blas( f, g, dx)', setupstr, number=10 )
time3 = timeit.timeit( 'l2_inner_trapz( f, g, dx)', setupstr, number=10 )
return (time1, time2, time3)
setup2 = "import numpy as np; x = np.linspace(-np.pi,np.pi,%d);"
def time_eval( n ):
setupstr = setup2 % n
time_sin = timeit.timeit( 'np.sin(x)', setupstr, number=10 )
time_sqrt = timeit.timeit( 'np.sqrt(x)', setupstr, number=10 )
time_exp = timeit.timeit( 'np.exp(x)', setupstr, number=10 )
return (time_sin, time_sqrt, time_exp)
## Perform timing for vector product.
times = np.zeros( (7,3) )
for i in range(7):
times[i,:] = time( 10**(i+1) )
x = 10**np.arange(1,8,1)
f, ax = plt.subplots()
ax.set( xscale='log', yscale='log', title='Inner vs. BLAS vs. trapz', \
ylabel='time [s]', xlabel='n')
ax.plot( x, times[:,0], label='numpy.inner' )
ax.plot( x, times[:,1], label='scipy.linalg.blas.ddot')
ax.plot( x, times[:,2], label='numpy.trapz')
plt.legend()
## Perform timing for function evaluation.
times_eval = np.zeros( (7,3) )
for i in range(7):
times_eval[i,:] = time_eval( 10**(i+1) )
x = 10**np.arange(1,8,1)
f, ax = plt.subplots()
ax.set( xscale='log', yscale='log', title='sin vs. sqrt vs. exp', \
ylabel='time [s]', xlabel='n')
ax.plot( x, times_eval[:,0], label='numpy.sin' )
ax.plot( x, times_eval[:,1], label='numpy.sqrt')
ax.plot( x, times_eval[:,2], label='numpy.exp' )
plt.legend()
## Test convergence.
def error( n ):
x = np.linspace( -np.pi, np.pi, n )
dx = 2.0 * np.pi / (n-1)
f = np.exp( x )
l2 = 0.5/np.pi*(np.exp(2.0*np.pi) - np.exp(-2.0*np.pi))
err1 = np.abs( (l2 - l2_inner( f, f, dx )) / l2)
err2 = np.abs( (l2 - l2_inner_blas( f, f, dx )) / l2)
err3 = np.abs( (l2 - l2_inner_trapz( f, f, dx )) / l2)
return (err1, err2, err3)
acc = np.zeros( (7,3) )
for i in range(7):
acc[i,:] = error( 10**(i+1) )
x = 10**np.arange(1,8,1)
f, ax = plt.subplots()
ax.plot( x, acc[:,0], label='numpy.inner' )
ax.plot( x, acc[:,1], label='scipy.linalg.blas.ddot')
ax.plot( x, acc[:,2], label='numpy.trapz')
ax.set( xscale='log', yscale='log', title=r'$\langle \exp(x), \exp(x) \rangle$', \
ylabel='Relative Error', xlabel='n')
plt.legend()