第一部分 转换数据,把从一个文件夹下的所有文件写到一个大文件
import os
import pandas as pd
import sys
#该模块,实现转换数据的功能
#全局变量
DATA=pd.DataFrame()
def readfile(filepath):
externfiles=os.listdir(filepath)
externfiles.sort()
print(externfiles)
#遍历文件下的所有文件
for files in externfiles:
interfiles=os.listdir(filepath+"\\"+files)
interfiles.sort()
for datafile in interfiles:
global DATA
size = pd.read_csv(filepath + '\\' + files + '\\' + datafile, nrows=0)
# size=list(size.columns)
# size=str(size[0])
# size=size.split(" ")
# size=size[0].split(":")
# size=float(size[1])
size = float(((str(list(size.columns)[0]).split(" "))[0].split(":"))[1])
data = pd.read_csv(filepath+'\\'+files+'\\'+datafile, header=1)
data = data[['step_index','gap','r']]
data['swarmsize'] = size
data=data[['swarmsize','step_index','gap','r']]
DATA=pd.concat([DATA,data],axis=0,ignore_index=True)
DATA.to_csv("D:\\SwarmData\\trans_result\\trans.csv") #写入到文件
filepath="D:\\SwarmData\\result"
if len(sys.argv) > 1:
filepath = sys.argv[1]
print(filepath)
readfile(filepath)
第二部分 从大文件中挑选出,需要处理的数据
import pandas as pd
import os
import sys
#该模块,实现从转换数据中,选出step=250的数据
pickfilepath="D:\\SwarmData\\trans_result\\trans_step250.csv"
readfilepath="D:\\SwarmData\\trans_result\\trans_step.csv"
def pickfile(pickfilepath):
pickdata = pd.read_csv(readfilepath)
pickdata.drop('Unnamed: 0', axis=1, inplace=True)
pickdata.to_csv(pickfilepath)
def readfile(filepath):
data= pd.read_csv(filepath)
data_step=data.loc[data['step_index']==250]
data_step.drop('Unnamed: 0',axis=1,inplace=True)
data_step.to_csv(readfilepath)
pickfile(pickfilepath)
filepath = "D:\\SwarmData\\trans_result\\trans.csv"
if len(sys.argv) > 1:
filepath = sys.argv[1]
print(filepath)
readfile(filepath)
第三部分 回归系数 计算,和拟合平面的显示,与测试数据
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
#该模块,包含三个函数,
#calculate实现从文件读取数据,并计算系数矩阵
#show展示拟合平面
#testfunc测试拟合效果
#全局变量
ws=np.mat(np.array([]))
data = pd.DataFrame()
traindata=pd.DataFrame()
testdata = pd.DataFrame()
L=[]
def calculate(filepath):
global data
global testdata
global L
global ws
data = pd.read_csv(filepath)
data.drop('Unnamed: 0', axis=1, inplace=True)
data['const1'] = 1
traindata=data.copy()
for i in np.arange(200):
index = np.random.randint(0, 1999 - i)
while index in L:
index = np.random.randint(0, 1999 - i)
L.append(index)
test = (pd.DataFrame(traindata.iloc[index])).T
testdata = pd.concat([testdata, test], axis=0, ignore_index=True)
traindata.drop([index], inplace=True)
xMat = data[['swarmsize', 'gap', 'const1']].as_matrix()
yMat = data[['r']].as_matrix()
xTx = np.mat(np.dot(xMat.T,xMat))
if np.linalg.det(xTx) == 0.0:
print("This matrix is singular, cannot do inverse")
return
ws = xTx.I * np.mat(np.dot(xMat.T , yMat))
print(ws)
print(ws.shape)
def show():
xMat = data[['swarmsize', 'gap', 'const1']].as_matrix()
yMat = data[['r']].as_matrix()
size60 = data.loc[data['swarmsize'] == 60]
size80 = data.loc[data['swarmsize'] == 80]
size100 = data.loc[data['swarmsize'] == 100]
size120 = data.loc[data['swarmsize'] == 120]
size140 = data.loc[data['swarmsize'] == 140]
size160 = data.loc[data['swarmsize'] == 160]
size180 = data.loc[data['swarmsize'] == 180]
size200 = data.loc[data['swarmsize'] == 200]
gap = np.array(data['gap'])
swarmsize = np.array(data['swarmsize'])
r = np.array(yMat)
gap60 = np.array(size60['gap'])
swarmsize60 = np.array(size60['swarmsize'])
r60 = np.array(size60['r'])
gap80 = np.array(size80['gap'])
swarmsize80 = np.array(size80['swarmsize'])
r80 = np.array(size80['r'])
gap100 = np.array(size100['gap'])
swarmsize100 = np.array(size100['swarmsize'])
r100 = np.array(size100['r'])
gap120 = np.array(size120['gap'])
swarmsize120 = np.array(size120['swarmsize'])
r120 = np.array(size120['r'])
gap140 = np.array(size140['gap'])
swarmsize140 = np.array(size140['swarmsize'])
r140 = np.array(size140['r'])
gap160 = np.array(size160['gap'])
swarmsize160 = np.array(size160['swarmsize'])
r160 = np.array(size160['r'])
gap180 = np.array(size180['gap'])
swarmsize180 = np.array(size180['swarmsize'])
r180 = np.array(size180['r'])
gap200 = np.array(size200['gap'])
swarmsize200 = np.array(size200['swarmsize'])
r200 = np.array(size200['r'])
# y = np.array(yOut)
ax = plt.axes(projection='3d')
ax.set_xlim(0, 100)
ax.set_ylim(60, 200)
ax.set_zlim(0, 320)
ax.set_title('step_index=250', fontsize=15)
ax.set_xlabel('gap', fontsize=15)
ax.set_ylabel('swarmsize', fontsize=15)
ax.set_zlabel('r', fontsize=15)
ax.scatter3D(gap60, swarmsize60, r60, s=5, color='b', label='size:60')
ax.scatter3D(gap80, swarmsize80, r80, s=5, color='g', label='size:80')
ax.scatter3D(gap100, swarmsize100, r100, s=5, color='c', label='size:100')
ax.scatter3D(gap120, swarmsize120, r120, s=5, color='y', label='size:120')
ax.scatter3D(gap140, swarmsize140, r140, s=5, color='k', label='size:140')
ax.scatter3D(gap160, swarmsize160, r160, s=5, color='slategrey', label='size:160')
ax.scatter3D(gap180, swarmsize180, r180, s=5, color='m', label='size:180')
ax.scatter3D(gap200, swarmsize200, r200, s=5, color='r', label='size:200')
ax.legend(loc='best')
# X, Y = np.meshgrid(gap, swarmsize)
# ax.plot_surface(X,Y,y, rstride=1, cstride=1, cmap='jet')
X = np.arange(0, 100, 1)
Y = np.arange(60, 200, 1)
X, Y = np.meshgrid(X, Y) # 将坐标向量变为坐标矩阵,列为x的长度,行为y的长度
Z = float(ws[1][0]) * X + float(ws[0][0]) * Y + float(ws[2][0])
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, linewidth=0, color='w')
plt.show()
def testfunc():
# size60 = testdata.loc[testdata['swarmsize'] == 60]
# size80 = testdata.loc[testdata['swarmsize'] == 80]
# size100 = testdata.loc[testdata['swarmsize'] == 100]
# size120 = testdata.loc[testdata['swarmsize'] == 120]
# size140 = testdata.loc[testdata['swarmsize'] == 140]
# size160 = testdata.loc[testdata['swarmsize'] == 160]
# size180 = testdata.loc[testdata['swarmsize'] == 180]
# size200 = testdata.loc[testdata['swarmsize'] == 200]
size60 = data.loc[data['swarmsize'] == 60]
size80 = data.loc[data['swarmsize'] == 80]
size100 = data.loc[data['swarmsize'] == 100]
size120 = data.loc[data['swarmsize'] == 120]
size140 = data.loc[data['swarmsize'] == 140]
size160 = data.loc[data['swarmsize'] == 160]
size180 = data.loc[data['swarmsize'] == 180]
size200 = data.loc[data['swarmsize'] == 200]
def test(size,num):
xtestMat= size[['swarmsize', 'gap', 'const1']].as_matrix()
ytestMat = (size[['r']].as_matrix()).tolist()
yOut = (np.dot(xtestMat, ws)).tolist()
sum = 0
# print('num:', xtestMat.shape[0])
L=[]
numz=0
numf=0
for i in range(len(ytestMat)):
L.append(abs(float(ytestMat[i][0]) - float(yOut[i][0])))
if float(ytestMat[i][0]) - float(yOut[i][0])>0:
numz+=1
else:
numf+=1
mean=np.mean(L)
var=np.var(L)
# print('swarmsize', num, ":","共",numz+numf,"个 ","正数:",numz,"个"," 负数:",numf,"个")
print('swarmsize', num,': 误差期望:', mean,' 误差方差:',var)
test(size60,60)
test(size80,80)
test(size100,100)
test(size120,120)
test(size140,140)
test(size160,160)
test(size180, 180)
test(size200, 200)
filepath = "D:\\SwarmData\\trans_result\\trans_step250.csv"
if len(sys.argv) > 1:
filepath = sys.argv[1]
print(filepath)
calculate(filepath)
show()
testfunc()
拟合效果图