逻辑斯蒂回归

import math
feature=[]
result=[]
theta=[]
tempfeature=[]
test_result=[]
def getDataFromFile():
    with open('kr-vs-kp.data.txt') as fileData:
        #recordIndex=0
        for each in fileData:        
            cur=each.split(',')
            feature.append([ord(eachItem)for eachItem in cur[:-1]])#transform chars to ASCII
            #feature[recordIndex].insert(0,1)#
            #recordIndex+=1
            if cur[-1][:-1]=='won':
                result.append(1)
            else:          
                result.append(0)#'won\\n','nowin\\n'
            
        feature.pop()
        feature.pop()
        tempfeature=feature
#print(feature[0])
#print(result) #read data set from file
def feature_scale(feature_list):
    featureIndex=0
    for eachFeatureListItem in feature_list:
        #print('before scale the list is:',eachFeatureListItem)
        averageValue=sum(eachFeatureListItem)/len(eachFeatureListItem)
        maxValue=max(eachFeatureListItem)
        minValue=min(eachFeatureListItem)
        feature_list[featureIndex]=[(ea-averageValue)/(maxValue-minValue+1) for ea in eachFeatureListItem]       
        #print("after scale the list is :",feature_list[featureIndex])
        #print('max and min',max(feature_list[featureIndex]),min(feature_list[featureIndex]))
        featureIndex+=1
        
def inittheta():    
    for th in range(37):
        theta.append(1.)            
                             
def hypothesisOfLogisticRegression(theta_list,feature_record):#theta_list index 0-36,feature_list index 0-35
    product_thetalist_featurelist=.0
    for eachvalue in range(36):#calculate product of theta and feature from theta1 to theta35
        product_thetalist_featurelist+=theta_list[eachvalue+1]*feature_record[eachvalue]
    product_thetalist_featurelist+=theta_list[0]    
    #print(product_thetalist_featurelist)
    #print(math.exp(-product_thetalist_featurelist))
    #print(1/(1+math.exp(-product_thetalist_featurelist)) )
    return 1/(1+math.exp(-product_thetalist_featurelist))    
#print(hypothesisOfLogisticRegression(theta, feature[0]))
#print(sum(feature[0]))
def calculteCostFunction(thetaList,featureList,resultList):    
    for calCost in range(3196):
        #calculate cost function
        Costvalue=.0
        Costvalue+=resultList[calCost]*math.log10(hypothesisOfLogisticRegression(thetaList, featureList[calCost]))+(1-resultList[calCost])*math.log10(1-hypothesisOfLogisticRegression(thetaList, featureList[calCost]))
    #print('cost function value is',(-1/3196*Costvalue))
    return (-1/3196*Costvalue)


def SGD():    
    temptheta=theta
    tempCost=.0
    Cost=1.
    while(Cost-tempCost>0.0000000001): 
        for eachRecord in range(3196):
            if(Cost-tempCost>0.0000000001):
                Cost=calculteCostFunction(theta, feature, result)
                print('the value of cost function is:',Cost)
                for j in range(1,37):
                    theta[j]=theta[j]-0.01*(hypothesisOfLogisticRegression(temptheta,feature[eachRecord])-result[eachRecord])*feature[eachRecord][j-1]
                theta[0]=theta[0]-0.01*(hypothesisOfLogisticRegression(temptheta,feature[eachRecord])-result[eachRecord])
                temptheta=theta
                tempCost=calculteCostFunction(theta, feature, result)
                print('new cost is ',tempCost)
            else:
                print("Find the optimal theta")
                #print(theta)              
                    
def GD():
    temp2theta=theta
    temp2Cost=.0
    Cost2=1.
    sumValue=[.0]
    while(Cost2-temp2Cost>0.00001):
        Cost2=calculteCostFunction(theta, feature, result)
        print('Cost is',Cost2)        
        for j in range(1,37):
            sumValue.append(.0)
            for i in range(3196):
                sumValue[j]+=(hypothesisOfLogisticRegression(temp2theta, feature[i])-result[i])*feature[i][j-1]
            theta[j]-=0.01*sumValue[j]
        for i1 in range(3196):
            sumValue[0]+=(hypothesisOfLogisticRegression(temp2theta, feature[i1])-result[i1])
        theta[0]-=0.01*sumValue[0]
        temp2Cost=calculteCostFunction(theta, feature, result)            
        print('new cost is ',temp2Cost)        
    print('Find optimal theta ',theta)
    print('cost is',temp2Cost)
    
def getTestResult():
    for f1 in feature:
        if(hypothesisOfLogisticRegression(theta, f1)>=0.5):
            test_result.append(1)
        else:

            test_result.append(0) 

                  

def TestLRClassifier():
    correct_count=0
    wrong_count=0
    for ii in range(3196):
        if(test_result[ii]==result[ii]):
            correct_count+=1
        else:
            wrong_count+=1
    print('the correct proportion is ',correct_count/len(feature))
   print('the wrong proportion is ',wrong_count/len(feature))                         
    
getDataFromFile()
inittheta()
#print(theta)
feature_scale(feature) 
hypothesisOfLogisticRegression(theta, feature[0])         
calculteCostFunction(theta, feature, result)
GD()
#print('finally',theta)
getTestResult()
TestLRClassifier()          
        

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值