c和python哪个快_为什么Python比LAPACK和C更快

我最近需要从一些C代码中计算SVD。考虑到LAPACK的稳定性和广泛的接受度,我决定使用它。代码的运行速度似乎比我想象的要慢得多。我一直相信Python和Numpy通过LAPACK计算SVD,所以我决定将速度与Python进行比较。令我惊讶的是,Python代码在计算SVD时比C代码快得多。事实上,Python代码可以计算一个完整的SVD,其速度与C/LAPACK代码计算SVD的一半差不多。在

这是我第一次从C调用Fortran,所以我想我做错了什么导致了速度减慢,但我不知道是什么原因。在Question: Why is the Python code so much faster than the C code?

最后,我需要C语言中更快的SVD,所以接下来的问题是:Question: Is there a stable performant C library for computing partial SVD (preferrably of sparse matrices).

下面是一些计时测试的最小代码。所使用的矩阵没有任何特殊的含义,我只需要一些可以用C和Python轻松生成的东西。在

测试#include

#include

#include

#include

#include

extern void dgesvdx(

char* jobu,

char* jobv,

char* range,

int* m,

int* n,

double* a,

int* lda,

double* vl,

double* vu,

int* il,

int* iu,

int* ns,

double* s,

double* u,

int* ldu,

double* vt,

int* ldvt,

double* work,

int* lwork,

int* iwork,

int* info);

extern void dgesdd(

char* jobz,

int* m,

int* n,

double* a,

int* lda,

double* s,

double* u,

int* ldu,

double* vt,

int* ldvt,

double* work,

int* lwork,

int* iwork,

int* info);

typedef enum bool {false, true} bool;

bool svd(double* a,

int num_rows,

int num_cols,

int num_sing_vals,

double* u,

double* s,

double* vt,

bool use_dgesdd) {

double* a_work;

int* iwork;

double* work;

int lwork;

int min_m_n;

int info = 0;

double vl = 0;

double vu = 1;

int il = 1;

int iu = num_sing_vals;

int ns = num_sing_vals;

//

// Compute and allocate optimal workspace

//

min_m_n = (num_cols < num_rows) ? num_cols : num_rows;

lwork = -1;

// copy a to prevent corruption

a_work = (double*)malloc(num_cols*num_rows*sizeof(double));

// compute optimal workspace (lwork)

double worksize;

if(use_dgesdd) {

iwork = (int*)malloc(7*(min_m_n)*sizeof(int));

dgesdd("S",&num_cols,&num_rows,a_work,&num_cols,s,vt,&num_cols,u,&num_sing_vals,&worksize,&lwork,iwork,&info);

} else {

iwork = (int*)malloc(24*(min_m_n)*sizeof(int));

dgesvdx("V","V","I",&num_cols,&num_rows,a_work,&num_cols,&vl,&vu,&il,&iu,&ns,s,vt,&num_cols,u,&num_sing_vals,&worksize,&lwork,iwork,&info);

}

if(info) {

perror("error computing lwork\n");

return(false);

}

// allocate work

lwork = (int)worksize;

work = (double*)malloc(lwork*sizeof(double));

//

// Compute the svd

//

memcpy(a_work,a,num_cols*num_rows*sizeof(double));

if(use_dgesdd) {

dgesdd("S",&num_cols,&num_rows,a_work,&num_cols,s,vt,&num_cols,u,&num_sing_vals,work,&lwork,iwork,&info);

} else {

ns = num_sing_vals;

dgesvdx("V","V","I",&num_cols,&num_rows,a_work,&num_cols,&vl,&vu,&il,&iu,&ns,s,vt,&num_cols,u,&num_sing_vals,work,&lwork,iwork,&info);

}

if(info<0) {

perror("invalid argument in SVD\n");

return(false);

} else if(info>0) {

printf("SVD did not converge");

return(false);

}

free(iwork);

iwork = NULL;

free(work);

work = NULL;

free(a_work);

a_work = NULL;

return(true);

}

int main(int argc, char** argv) {

int num_rows = 1000;

int num_cols = 1000;

int size = num_rows*num_cols;

double* A = (double*)calloc(num_rows*num_cols,sizeof(double));

for(int r = 0; r < num_rows; ++r) {

for(int c = 0; c < num_cols; ++c) {

A[r*num_cols+c] = sin(5*M_PI*(r*num_cols+c)/((double)size))+cos(10*M_PI*(c*num_rows+c)/((double)size));

if(fabs(A[r*num_cols+c])<0.75 || fabs(A[r*num_cols+c]) > 0.9) {

A[r*num_cols+c] = 0;

}

}

}

int num_sing_vals = (num_rows < num_cols) ? num_rows : num_cols;

double* u_sdd = (double*)calloc(num_rows*num_sing_vals,sizeof(double));

double* s_sdd = (double*)calloc(num_sing_vals,sizeof(double));

double* vt_sdd = (double*)calloc(num_sing_vals*num_cols,sizeof(double));

clock_t tic = clock();

bool success = svd(A,num_rows,num_cols,num_sing_vals,u_sdd,s_sdd,vt_sdd,true);

clock_t toc = clock();

printf("full dgesdd %lf seconds\n",(double)(toc-tic)/CLOCKS_PER_SEC);

double* u_svd = (double*)calloc(num_rows*num_sing_vals,sizeof(double));

double* s_svd = (double*)calloc(num_sing_vals,sizeof(double));

double* vt_svd = (double*)calloc(num_sing_vals*num_cols,sizeof(double));

tic = clock();

success = svd(A,num_rows,num_cols,num_sing_vals,u_svd,s_svd,vt_svd,false);

toc = clock();

printf("full dgesvdx %lf seconds\n",(double)(toc-tic)/CLOCKS_PER_SEC);

int first_few_sv = 200;

double* u_partial_svd = (double*)calloc(num_rows*first_few_sv,sizeof(double));

double* s_partial_svd = (double*)calloc(num_sing_vals,sizeof(double));

double* vt_partial_svd = (double*)calloc(first_few_sv*num_cols,sizeof(double));

tic = clock();

success = svd(A,num_rows,num_cols,first_few_sv,u_partial_svd,s_partial_svd,vt_partial_svd,false);

toc= clock();

printf("partial dgesvdx %lf seconds\n",(double)(toc-tic)/CLOCKS_PER_SEC);

free(A);

free(u_sdd);

free(s_sdd);

free(vt_sdd);

free(u_svd);

free(s_svd);

free(vt_svd);

free(u_partial_svd);

free(s_partial_svd);

free(vt_partial_svd);

return 0;

}

编译使用:

^{pr2}$

这为我提供了以下定时输出:full dgesdd 8.003578 seconds

full dgesvdx 17.077815 seconds

partial dgesvdx 1.498775 seconds

与以下python代码比较:

测试_高级副总裁import numpy, time

dim = 1000

term1 = numpy.sin((5*numpy.pi*numpy.arange(dim**2).reshape(dim,dim))/(dim**2))

term2 = numpy.cos((10*numpy.pi*numpy.arange(dim**2).reshape(dim,dim).T)/(dim**2))

A = term1+term2

A = numpy.where(numpy.abs(A)<0.75,0,A)

A = numpy.where(numpy.abs(A)>0.9,0,A)

tic = time.time()

U,S,Vt = numpy.linalg.svd(A)

toc = time.time()

print "full svd", toc - tic, "seconds"

给出:python2 test_svd.py

full svd 2.4460 seconds

实际的singluar值一致。然而,Python中完整的SVD在时间上可以与C中的部分SVD相比,这一事实让我感到困惑。在

我的python是用gcc6.2.1编译的python2.7.12,GCC是6.2.1

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值