python跑得慢_为什么我的smo跑得这么慢?

我用python实现了一个smo算法。因为我只是作为一种实践,所以我没有使用像numpy和scipy这样的科学计算库。我只希望它能正常工作。但是当我在diabetes上测试我的代码时,它会持续运行一周!我检查了我的代码很多次,也发现了一些错误。但是在我纠正这些错误之后,代码仍然运行得太慢。我不知道是否有一些我没有检查出的错误,或者smo本身就是这么慢。在

那么,有没有一些常见的错误会使代码运行缓慢呢?我编写的程序引用了smo paper的伪代码

非常感谢。在

下面是我的代码。在#encoding=utf8

import math

import random

class SVM(object):

def __init__(self, dataset, target, C=0.001, tolerance=0.001):

self.dataset=dataset

self.target=target

self.C=C

self.tolerance=tolerance

self.alpha=[0.0 for i in range(len(dataset))]

self.E={}

self.b=0.0

self.w=[0.0 for i in range(len(dataset[0]))]

def train(self):

numChanged=0

exampleAll=1

trainset_size=len(self.dataset)

iter=0

while numChanged > 0 or exampleAll:

numChanged=0

if exampleAll:

for i in range(trainset_size):

numChanged+=self.examineExample(i)

iter+=1

else:

for i in range(trainset_size):

if self.alpha[i] > 0 and self.alpha[i] < self.C:

numChanged+=self.examineExample(i)

iter+=1

if exampleAll:

exampleAll=0

elif numChanged == 0:

exampleAll=1

print "iter", iter

print "alpha", "\t".join([str(i) for i in self.alpha])

print "target", "\t".join(self.target)

for j in range(len(self.trainset[0])):

for i in range(trainset_size):

self.w[j] +=self.alpha[i]*int(self.target[i])*float(self.dataset[i][j])

def examineExample(self, i2):

print "in examineExample", i2

print "alpha", "\t".join([str(i) for i in self.alpha])

alpha2=self.alpha[i2]

y2=int(self.target[i2])

e2=self.calculateE(i2)

r2=e2*y2

print "r2", r2

if r2 < -self.tolerance and self.alpha[i2] < self.C or r2 > self.tolerance and self.alpha[i2] > 0: #i2违反了kkt条件

i1=self.select_i1(i2,e2)

if self.takeStep(i1, i2):

return 1

else:

all_sample_index=[i for i in range(len(self.dataset)) ]

random.shuffle(all_sample_index)

for k in range(len(all_sample_index)):

i1=all_sample_index[k]

if self.alpha[i1] > 0 and self.alpha[i1] < self.C:

if self.takeStep(i1, i2):

return 1

random.shuffle(all_sample_index)

for k in range(len(all_sample_index)):

i1=all_sample_index[k]

if self.takeStep(i1,i2):

return 1

return 0

def takeStep(self, i1, i2):

print "in takeStep", i1, i2

if i1==i2:

return 0

alpha1=self.alpha[i1]

y1=int(self.target[i1])

e1=self.calculateE(i1)

alpha2=self.alpha[i2]

y2=int(self.target[i2])

e2=self.calculateE(i2)

s=y1*y2

if y1 != y2:

L=max(0, alpha2-alpha1)

H=min(self.C, self.C+alpha2-alpha1)

if y1== y2:

L=max(0, alpha2+alpha1-self.C)

H=min(self.C, alpha2+alpha1)

if L==H:

return 0

k11=self.kernel(i1, i1)

k12=self.kernel(i1, i2)

k22=self.kernel(i2, i2)

eta=k11+k22-2*k12

if eta > 0:

self.alpha[i2]=alpha2+y2*(e1-e2)/eta

if self.alpha[i2] < L:

self.alpha[i2]=L

if self.alpha[i2] >H:

self.alpha[i2]=H

print "abs", abs(self.alpha[i2] - alpha2)

if abs(self.alpha[i2] - alpha2) < 0.00001

return 0

self.alpha[i1]=alpha1+s*(alpha2-self.alpha[i2])

b1=self.b-e1-y1*(self.alpha[i1]-alpha1)*self.kernel(i1,i1)-y2*(self.alpha[i2]-alpha2)*self.kernel(i1,i2)

b2=self.b-e2-y1*(self.alpha[i1]-alpha1)*self.kernel(i1,i2)-y2*(self.alpha[i2]-alpha2)*self.kernel(i2,i2)

print "two old alpha", alpha1, alpha2

print "two alpha", self.alpha[i1] ,self.alpha[i2]

if self.alpha[i1] >0 and self.alpha[i1] < self.C and self.alpha[i2] > 0 and self.alpha[i2] < self.C:

print "two b", b1, b2

if self.alpha[i1] >0 and self.alpha[i1] < self.C:

self.b=b1

elif self.alpha[i2] > 0 and self.alpha[i2] < self.C:

self.b=b2

else:

self.b=(b1+b2)/2

self.E[i2]=self.calculateE(i2)

self.E[i1]=self.calculateE(i1)

return 1

else:

return 0

def select_i1(self, i, Ei ):

maxK=-1;

maxDeltaE=0.0

Ej=0

self.E[i]=Ei

for k in range(len(self.dataset)):

if self.alpha[k] > 0 and self.alpha[k] < self.C:

Ek=self.calculateE(k)

deltaE=Ek-Ei

if abs(deltaE) > maxDeltaE:

maxK=k

maxDeltaE=deltaE

Ej=Ek

if maxK != -1:

return maxK

else:

j=i

while j == i:

j=random.randint(0, len(self.dataset))

return j

def calculateE(self, i):

f_x=0.0

trainset_size=len(self.dataset)

for k in range(trainset_size):

f_x+=(self.alpha[k]*int(self.target[k])*self.kernel(k,i))

f_x+=self.b

e_x=f_x-float(self.target[i])

return e_x

def kernel(self, i, j):

return sum([float(self.dataset[i][k])*float(self.dataset[j][k]) for k in range(len(self.dataset[i]))])

def test(self, testset, testset_target):

precision=0.0

correct=0

for k in range(len(testset)):

sample =testset[k]

pred_value=0.0

for i in range(len(sample)):

pred_value+=self.w[i]*sample[i]

pred_value+=self.b

if pred_value >= 0:

label=1

else:

label=-1

if testset_target[k] == label:

correct+=1

precision=correct/(float(len(testset_target)))

return precision

def read_libsvm_format_file(dataset_filename):

dataset_file=file(dataset_filename,'r')

dataset_label=[]

dataset=[]

for line in dataset_file:

splitted=line.strip().split()

dataset_label.append(splitted[0])

sample=[]

for i in range(1,len(splitted)):

index_value=splitted[i].split(":")

sample.append(index_value[1])

dataset.append(sample)

return dataset, dataset_label

if __name__ == "__main__":

dataset, target =read_libsvm_format_file('diabetes')

trainset_size=500

index=range(len(dataset))

random.shuffle(index)

trainset=[ dataset[index[i]] for i in range(trainset_size) ]

trainset_target=[ target[index[i]] for i in range(trainset_size) ]

testset=[ dataset[index[i]] for i in range(trainset_size, len(index)) ]

testset_target=[ target[index[i]] for i in range(trainset_size, len(index)) ]

svm=SVM(dataset, target)

svm.train()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值