用tsfresh实现摔倒动作的分类(生成可供tsfresh使用的dataframe,tsfresh特征提取,sklearn决策树)

原始数据

原始数据是由传感器测到的俯仰角pitch angle和翻滚角roll angle随时间变化的一系列值,其具体形式如下所示:

-0.713761	1.336091	
-0.824189	1.509388	
-1.021816	1.695946	
-1.103965	1.928401	
-1.116714	2.074347	
-1.125247	1.908869	
-1.143308	1.131653	
-1.469824	0.384128	
-1.767419	-0.040086	
-2.048012	-0.543170	
-2.096062	-1.089851	
...        

由于数据点太密集,所以先对数据进行了采样,用列表存储这些数据,并根据文件名,对每组数据标注了标签,最后使用panda中的DataFrame()生成了表格数据,用Series()生成了id和标签之间的对应关系,具体代码如下图所示:

import glob
import pandas as pd
def Sample():
    cur_id = 0
    id_to_target = {}
    df_rows = []
    for i in glob.glob('*.txt'):
        cur_id += 1
        if (i == '1min正常1.txt') or (i == '2min正常.txt') or (i == '40s正常1.txt') or (i == '40s正常2.txt') or (i == '40s正常3.txt') or (i == '40s正常4.txt') or (i == '40s正常5.txt'):
            y = 0
        elif (i=='后仰伏地约3s.txt') or (i=='后仰瘫倒约4s.txt') or (i=='滑倒瘫地3s.txt'):
            y = 1
        elif (i=='前倒单臂撑地2s.txt') or (i=='前倒伏地约2s.txt') or (i=='前倒跪地约2s.txt'):
            y = 2
        elif (i=='右倒伏地约2s.txt') or (i=='右倒伏地约4s.txt') or (i=='右后倒伏地3s.txt') or (i=='右前倒伏地2s.txt') or (i=='右前倒伏地约4s.txt'):
            y = 3
        else:
            y = 4
        all_data = []
        with open(i) as f:
            for line in f.readlines():
                all_data.append(line)
            #采样数据
            sample_data1 = all_data[0:len(all_data):10]
            sample_data2 = all_data[0:len(all_data):5]
            sample_data3 = all_data[1:len(all_data):10]
            sample_data4 = all_data[1:len(all_data):5]
            sample_data5 = all_data[2:len(all_data):10]
            sample_data6 = all_data[2:len(all_data):5]
            sample_data7 = all_data[3:len(all_data):10]
            sample_data8 = all_data[3:len(all_data):5]
            sample_data9 = all_data[4:len(all_data):10]
            sample_data10 = all_data[4:len(all_data):5]
            #将每组数据量控制在20个数以内
            if len(sample_data1) > 20:
                sample_data1 = sample_data1[0:20]
            if len(sample_data2) > 20:
                sample_data2 = sample_data2[0:20]
            if len(sample_data3) > 20:
                sample_data3 = sample_data3[0:20]
            if len(sample_data4) > 20:
                sample_data4 = sample_data4[0:20]
            if len(sample_data5) > 20:
                sample_data5 = sample_data5[0:20]
            if len(sample_data6) > 20:
                sample_data6 = sample_data6[0:20]
            if len(sample_data7) > 20:
                sample_data7 = sample_data7[0:20]
            if len(sample_data8) > 20:
                sample_data8 = sample_data8[0:20]
            if len(sample_data9) > 20:
                sample_data9 = sample_data9[0:20]
            if len(sample_data10) > 20:
                sample_data10 = sample_data10[0:20]				
            time = 0
            #将采样得到的数据生成列表,并生成每组数据对应标签
            for j in range(len(sample_data1)):
                values1 = list(map(float, sample_data1[j].split()))
                df_rows.append([(cur_id-1)*10+1, time] + values1)
                id_to_target[(cur_id-1)*10+1] = y
                time += 1
            time = 0
            for j in range(len(sample_data2)):
                values2 = list(map(float, sample_data2[j].split()))
                df_rows.append([(cur_id-1)*10+2, time] + values2)
                id_to_target[(cur_id-1)*10+2] = y
                time += 1
            time = 0
            for j in range(len(sample_data3)):
                values3 = list(map(float, sample_data3[j].split()))
                df_rows.append([(cur_id-1)*10+3, time] + values3)
                id_to_target[(cur_id-1)*10+3] = y
                time += 1
            time = 0
            for j in range(len(sample_data4)):
                values4 = list(map(float, sample_data4[j].split()))
                df_rows.append([(cur_id-1)*10+4, time] + values4)
                id_to_target[(cur_id-1)*10+4] = y
                time += 1
            time = 0
            for j in range(len(sample_data5)):
                values5 = list(map(float, sample_data5[j].split()))
                df_rows.append([(cur_id-1)*10+5, time] + values5)
                id_to_target[(cur_id-1)*10+5] = y
                time += 1
            time = 0
            for j in range(len(sample_data6)):
                values6 = list(map(float, sample_data6[j].split()))
                df_rows.append([(cur_id-1)*10+6, time] + values6)
                id_to_target[(cur_id-1)*10+6] = y
                time += 1
            time = 0
            for j in range(len(sample_data7)):
                values7 = list(map(float, sample_data7[j].split()))
                df_rows.append([(cur_id-1)*10+7, time] + values7)
                id_to_target[(cur_id-1)*10+7] = y
                time += 1
            time = 0
            for j in range(len(sample_data8)):
                values8 = list(map(float, sample_data8[j].split()))
                df_rows.append([(cur_id-1)*10+8, time] + values8)
                id_to_target[(cur_id-1)*10+8] = y
                time += 1
            time = 0
            for j in range(len(sample_data9)):
                values9 = list(map(float, sample_data9[j].split()))
                df_rows.append([(cur_id-1)*10+9, time] + values9)
                id_to_target[(cur_id-1)*10+9] = y
                time += 1
            time = 0
            for j in range(len(sample_data10)):
                values10 = list(map(float, sample_data10[j].split()))
                df_rows.append([(cur_id-1)*10+10, time] + values10)
                id_to_target[(cur_id-1)*10+10] = y
                time += 1
    #将数据转化为表格的形式,并将id和标签对应起来。
    df = pd.DataFrame(df_rows, columns=['id', 'time', 'F1', 'F2'])
    y = pd.Series(id_to_target)
    return df, y


这是一张向后倒的数据图:
在这里插入图片描述

在生成好数据之后,就可以使用tsfresh强大的特征提取功能了,在执行过程中,先将数据可视化,看看数据大概的变化趋势,之后利用tsfresh的ComprehensiveFCParameters()和extract_features()提取特征,利用extract_relevant_features()可提取和分类相关的特征,之后利用sklearn.cross_validation中的train_test_split将数据分为训练集和测试集。利用sklearn的DecisionTreeClassifier()作为模型进行训练
DecisionTreeClassifier()是决策树,可以通过graphviz观察决策树,不过事先要安装好graphviz,安装地址安装过程中一直next就好啦,不过注意要将安装好之后的graphviz路径/bin添加到环境变量中。
之后可以利用classification_report()查看每一类的精确度,召回率,F1值或者利用print(cl.predict(X_test)) print(y_test)查看预测值和标签值
下面附上实现代码:

import matplotlib.pylab as plt
import seaborn as sns
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction.settings import ComprehensiveFCParameters
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report

df, y = Sample()
#print (y)

print(df.head())
#可视化数据
df[df.id == 1][['time', 'pitch_angle' ,'roll_angle']].plot(x='time',title='normal(id 1)', figsize=(12, 6))
df[df.id == 100][['time', 'pitch_angle' ,'roll_angle']].plot(x='time',title='fall forward(id 100)', figsize=(12, 6))
df[df.id == 130][['time', 'pitch_angle' ,'roll_angle']].plot(x='time',title='right down(id 130)', figsize=(12, 6))
df[df.id == 210][['time', 'pitch_angle' ,'roll_angle']].plot(x='time',title='left down(id 210)', figsize=(12, 6))
df[df.id == 230][['time', 'pitch_angle' ,'roll_angle']].plot(x='time',title='back down(id 230)', figsize=(12, 6))
plt.show()

#特征提取
extract_settings = ComprehensiveFCParameters()
#以id聚合
X = extract_features(df, column_id='id', column_sort='time', default_fc_parameters=extract_settings, impute_function=impute)
#print(X.head())
#提取最相关特征 三个步骤
X_filtered = extract_relevant_features(df, y,column_id='id', column_sort='time', default_fc_parameters=extract_settings)
X_filtered.info()
#将测试集训练集分开,使测试集占总样本的0.4
X_train, X_test, X_filtered_train, X_filtered_test, y_train, y_test = train_test_split(X, X_filtered, y, test_size=0.4)

#获得决策树分类器,根据特征进行逐级分类
cl =DecisionTreeClassifier()
cl=cl.fit(X_train, y_train)
#可视化决策树
doc_data=tree.export_graphviz(cl,out_file=None)
graph = graphviz.Source(doc_data)
graph
#tree.plot_tree(cl)
print(classification_report(y_test, cl.predict(X_test)))
print(cl.predict(X_test))
print(y_test)
cl.n_features_

cl2 = DecisionTreeClassifier()
cl2=cl2.fit(X_filtered_train, y_train)
#tree.plot_tree(cl2)
print(classification_report(y_test, cl2.predict(X_filtered_test)))

来看看两次测试集的结果,效果还不错,特征多的时候比特征少的时候效果好。
在这里插入图片描述

最后是训练集的决策树模型
在这里插入图片描述
时间序列不仅可以进行房价预测,股票预测,还可以进行分类(tsfresh用起来),就像这个例子中的老人摔倒姿势判断一样的例子,大家可以尽情发挥,希望可以帮助到大家!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值