数据分析总结
本文所做的是一个基于给定的定义并进行数据筛选以及可视化的数据分析
不涉及数据挖掘算法
1. 数据的采集
在导出原始数据前先要观察原始数据的结构,大致确定筛选的主方向,此过程极为重要,筛选的主方向一开始确定的对了话可以省去许多不必要的力气。
2.原始数据的预处理
提取所需要的原始数据,替换异常值为均值,并根据需要进行降噪(平滑处理)
#提取中心点和尾根的角速度以及移动距离
import numpy as np
import pandas as pd
x_pre=[]
excel_path = r'C:\Users\15643\Desktop\analysis\Export Files\row_data4-1.xls'
df = pd.DataFrame(pd.read_excel(excel_path))
df.replace('-',np.nan,inplace=True)
df.dropna(axis=0,how='any')
for m in range(35):
df.drop([m],axis=0,inplace=True)
df_new =df.iloc[:,[1,14,16,17,19]]
df_new.columns=['记录时间','移动距离','中心点角速度','尾根角速度','转角']
替换异常值(将角速度>4000设定为异常值)
# 将瞬时角速度大于4000的认定为异常值 替换为中心点角速度的平均值
vel_mean = np.mean(df_new.iloc[:,2])
df_new['中心点角速度'] = df_new['中心点角速度'].apply(lambda x : vel_mean if x >= 4000 else x)
t = df_new['记录时间']
vel = df_new['中心点角速度']
import matplotlib.pyplot as plt
plt.plot(t,vel,"b.-")
plt.ylabel('vel')
plt.legend(['clean data'])
plt.grid(True)#网格线
plt.show()
df_new.to_excel(r'C:\Users\15643\Desktop\analysis\Export Files\clean_data4-1.xls',sheet_name='test')
降噪处理-window size为10的可滑动的平滑窗口,取每个窗口的平均值保存在新的list中作为平滑后结果
#信号数据平滑
signal_row= pd.DataFrame(pd.read_excel(r'C:\Users\15643\Desktop\analysis\Export Files\signal_clean_data4.xls'))
import matplotlib.pyplot as plt
signal_smooth_r=[]
signal_smooth_l=[]
for i in range(len(signal_row_r)):
spilt = 10+i
spilt_pre=i
if spilt<len(signal_row_r):
sig_r = np.mean(signal_row.iloc[spilt_pre:spilt,4])
signal_smooth_r.append(sig_r)
sig_l = np.mean(signal_row.iloc[spilt_pre:spilt,3])
signal_smooth_l.append(sig_l)
leng = len(signal_row)
leng_pre = len(signal_row)-10
for i in range(leng_pre,leng):
signal_row.drop([i],axis=0,inplace=True)
signal_row['left_smooth']=pd.DataFrame(signal_smooth_l)#添加列
signal_row['right_smooth']=pd.DataFrame(signal_smooth_r)
signal_row.to_excel(r'C:\Users\15643\Desktop\analysis\Export Files\signal_clean_data_fin4.xls')
以下为预处理后的结果
执行筛选过程
#寻找转向过程2-不完全按照定义--切片角度-随后再与钙信号match
import numpy as np
import pandas as pd
import xlwt
import math
data= pd.DataFrame(pd.read_excel(r'C:\Users\15643\Desktop\analysis\Export Files\clean_data4_smooth.xls',sheet_name='test'))
data_signal = pd.DataFrame(pd.read_excel(r'C:\Users\15643\Desktop\analysis\Export Files\signal_clean_data_fin4.xls'))
data_angle =data.loc[(data['转角']>=60)]
time0_df = data.iloc[0,2]
dur = duration/0.02
dur = int(dur)
leng_data = len(data_angle)
for i in range(leng_data-1):
time0=data_angle.iloc[i,2]
duration = time0-time0_df
dur = duration/0.02
dur = int(dur)
data_spe = data.iloc[dur-25:dur+1+25]
data_ang_vel = data_spe.iloc[:,4]
data_ang_vel_mean = np.mean(data_ang_vel)
data_ang = data.iloc[:,6]
data_ang_max = np.max(data_ang)
data_ang_min=np.min(data_ang)
data_ang_fin = data_ang_max-data_ang_min
data_ang_vel_max = np.max(data_ang_vel)
data_ang_vel_min=np.min(data_ang_vel)
data_signal_spe =data_signal.iloc[dur-25:dur+1+25]
if data_ang_vel_mean>30 and data_ang_fin>=60:
#print("符合平均角速度和转向角度的条件")
if data_ang_vel_max>30 and data_ang_vel_min<30:
data_distance = data_spe.iloc[:,3]
data_distance_mean=np.mean(data_distance)
if data_distance_mean>0.03:#用中心点的平均移动距离筛选掉那些在原地的假阳转向运动
t = data_spe["记录时间"]
vel = data_spe["中心点角速度"]
signal_l = data_signal_spe["left"]
signal_r = data_signal_spe["right"]
signal_l_smooth = data_signal_spe["left_smooth"]
signal_r_smooth = data_signal_spe["right_smooth"]
fig,ax1 = plt.subplots()
ax2 = ax1.twinx() # 做镜像处理
ax1.plot(t,vel,'g.-')
ax2.plot(t,signal_r,'b.-',t,signal_r_smooth,'r--',t,signal_l,'y.-',t,signal_l_smooth,'p--')
ax2.legend(["right","smooth_right","left","smooth_left"],loc='best')
ax1.set_xlabel('Time')
ax1.set_ylabel('angular vel',color='g') #设置Y1轴标题
ax2.set_ylabel('signal')#设置Y2轴标题
fig.savefig(r'C:\Users\15643\Desktop\analysis\Export Files\4_fin\{}.png'.format(i),sheet_name='test')
fig.show()
以下为最终结果