python几种高性能计算方法

最新推荐文章于 2024-08-07 04:12:23 发布

Eric An

最新推荐文章于 2024-08-07 04:12:23 发布

阅读量1.4k

点赞数

分类专栏：数据专题《大数据异常检测与推荐系统》

本文链接：https://blog.csdn.net/yunxinan/article/details/101143073

版权

数据专题《大数据异常检测与推荐系统》专栏收录该内容

83 篇文章 0 订阅

订阅专栏

1、python使用numpy计算时候如果存在图像等大量操作导致并行计算性能降低后我们使用Numba 科学库提高性能计算：

#安装命令
conda install numba
#代码对比
impoort time
import pandas as pd
def time_com(i):
		cum = 0
		for test in range(i):
		for ind in range(i):
			cum += (test * ind)%3
	if __name__=='__main__'
	t1 = time.clock()
	df = pd.DataFrame()
	for i in range(500):
		time_com(i)
	t2 = time.clock()
	print("run time:%f s"%(t2 - t1))

import time
import pandas as pd
form numba import jit
@jit
def time_com(i):
		cum = 0 
		for test in range(i):
		for ind in range(i):
			cum += (test *ind)%3

if  __name__ =='__main__'
	t1 = time.clock
	df = pd.Dataframe()
	for i in range(500):
			time_com(i)
	t2 = time.clock()
	print('run time:%f s'%(t2 - t1))

运行时间对比：第一段run times：7.7149；第二段run times：0.1909

2、python两种提速的思路一般基于jit技术，Cpython和pypy的实现来实际提高对比：

# Tips: copy from hello_gpu.py in the package
import time
	def list_fuction():
			list_1 = range(100000)
			list_2 = range(100000)
			result_list = list()
			for cnt in range(100):
				for (a,b) in zip（list_1,list_2）:
				result_list.append(a + b)
			return sum(result_list)
start = time.time()
print(list_function())
print('time elaspse',time.time() - start)

import time
import pandas as pd

def list_function():
	list_1 = range(100000)
	list_2 = range(100000)
	result_list  = list()
	for cnt in range(100):
		for (a,b) in zip(list_1,list_2):
		result_list.append(a + b)
	return sum(result_list)

def df_function():
	df = pd.DataFrame()
	df['a'] = range(100000)
	de['b'] = range(100000)
	accu = 0
	for cnt in range(100):
		accu += sum(df['a'] +df['b'])
	return accu
start = time.time()

print (list_function())
print(df_function())
print('time elapse',time.time() - start)

两段代码对比第一段用时3.5s,第二段用时5.5s

3、python调用pyCUDA实现并行计算：

import pycuda.autoinit
impport pycuda.driver as drv
import numpy

from pycuda.compiler import SurceModule
mod = SourceModule(""" __global__void multply_them(float*dest,float*a,float*b)
{
const int i = threadIdx.x;
dest[i] = a[i] * b[i];
}
"""
)
mulitply_them = mod.get_function('multipy_them')

a = numpy.random.randn(400).astype(numpy.float32)
b = numpy.random.randn(400).astype(numpy.float32)

dest = numpy.zeros_like(a)
multiply_them(
		drv.Out(dest),drv.In(a),drv.In(b),
		block = (400,1,1),grid = (1,1))
)
print(dest - a*b)