Task02学习心得

数据分析及可视化

导入相关库

import warnings
warnings.filterwarnings("ignore")
import os 
import multiprocessing as mp
from tqdm import tqdm
import read_all_data
import pickle
import pandas as pd
import random
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

定义相关函数

# 加载数据类
class Load_Save_Data():
    def __init__(self,file_name=None):
        self.name = file_name
        
    def load_data(self,Path=None):
        if Path in None:
            assert self.filename is not None,"Invaid Path..."
        else:
            self.filename = Path
        with open(self.filename,"wb") as f:
            data = pickle.load(f)
        return data 
    
    def save_data(self,data,path):
        if path is None:
            assert self.filename is not None,"Invalid path..."
        else:
            self.filename = path
        with open(self.filename,"wb") as f:
            pickle.dump(data,f)
# 读取数据函数
def read_data(Path,kind=""):
    filenames = os .listdir(Path)
    print("\n@Read data from " + Path + "................")
    with mp.Pool(processes=mp.cpu_count()) as pool:
        data_total = list(tqdm(pool.map(read_all_data.read_train_file if kind == "train" 
                                        else read_all_data.read_test_file,filenames),total=len(filenames)))
    print("\n@End Read total Data................")
    load_save = Load_Save_Data()
    if kind == 'train':
        load_save.save_data(data_total,"./data_tmp/total_data.pkl")
    return data_total
# 分类数据函数
def get_diff_data():
    Path = "./data_tmp/total_data.pkl"
    with open(Path,"rb") as f:
        total_data = pickle.load(f)
    
    load_save = Load_Save_Data()
    
    kind_data = ["刺网","围网","拖网"]
    file_name = ["ciwang_data.pkl","weiwang_data.pkl","tuowang_data.pkl"]
    for i,datax in enumerate(kind_data):
        data_type = [data for data in total_data if data["type"].unique()[0] == datax]
        load_save.save_data(data_type,"./data_tmp/" + file_name[i])
# 随机读取某个渔船轨迹
def get_random_one_traj(type=None):
    random.seed(10)
    path = "./data_tmp/"
    with open(path + type + ".pkl","rb") as f1:
        data = pickle.load(f1)
    length = len(data)
    index = np.random.choice(length)
    return data[index]
# 抽取三类轨迹
def get_random_three_traj(type=None):
    random.seed(10)
    path = "./data_tmp/"
    with open(path + type + ".pkl","rb") as f:
        data = pickle.load(f)
    data_arrange = np.arange(len(data)).tolist()
    index = random.sample(data_arrange,3)
    return data[index[0]],data[index[1]],data[index[2]]
# 可视化三类轨迹
def visualize_three_traj():
    fig,axes = plt.subplots(nrows=3,ncols=3,figsize=(20,15))
    plt.subplots_adjust(wspace=0.2,hspace=0.2)
    
    lables = ["ciwang","weiwang","tuowang"]
    for i,file_type in tqdm(enumerate(["ciwang_data","weiwang_data","tuowang_data"])):
        data1,data2,data3 = get_random_three_traj(type=file_type)
        for j,datax in enumerate([data1,data2,data3]):
            x_data = datax["x"].loc[-1:].values
            y_data = datax["y"].loc[-1:].values
            axes[i][j - 1].scatter(x_data[0],y_data[0],label="start",c="red",s=10,
                                   marker="8")
            axes[i][j - 1].plot(x_data,y_data,label=lables[i])
            axes[i][j - 1].scatter(x_data[len(x_data) - 1],y_data[len(y_data) - 1],
                                   label="end",c="green",s=10,marker="v")
            axes[i][j - 1].grid(alpha=2)
            axes[i][j - 1].legend(loc="best")

读取数据

train_path = r"D:\anaconda\Proj\wisdomOcean\组队学习文件\hy_round1_train_20200102"
data_train = read_data(train_path,"train")
data_train = pd.concat(data_train)

test_path = r"D:\anaconda\Proj\wisdomOcean\组队学习文件\hy_round1_testA_20200102"
data_test = read_data(test_path,kind='test')
data_test = pd.concat(data_test)

读取数据进度

了解数据

data_test.shape

形状

data_test.columns

属性

data_test.head()

测试数据

data_train.shape

形状

data_train.columns

属性

data_train.describe([0.01,0.025,0.05,0.5,0.75,0.9,0.99])

概要

处理数据

get_diff_data()

可视化

visualize_three_traj()

在这里插入图片描述

轨迹可视化

坐标序列可视化

def visualize_one_traj_x_y():
    fig,axes = plt.subplots(nrows=2,ncols=1,figsize=(10,8))
    plt.subplots_adjust(wspace=0.5,hspace=0.5)
    
    data1 = get_random_one_traj(type="weiwang_data")
    x = data1["x"].loc[-1:]
    x = x /10000
    
    y = data1["y"].loc[-1:]
    y = y /10000
    
    arr1 = np.arange(len(x))
    arr2 = np.arange(len(y))
    
    axes[0].plot(arr1,x,label="x")
    axes[1].plot(arr2,y,label="y")
    axes[0].grid(alpha=3)
    axes[0].legend(loc="best")
    axes[1].grid(alpha=3)
    axes[1].legend(loc="best")
visualize_one_traj_x_y()

在这里插入图片描述

速度方向可视化

def visualize_three_traj_speed_direction():
    fig,axes = plt.subplots(nrows=3,ncols=2,figsize=(20,15))
    plt.subplots_adjust(wspace=0.3,hspace=0.3)
    
    file_types = ["ciwang_data","weiwang_data","tuowang_data"]
    speed_types = ["ciwang_speed","weiwang_speed","tuowang_speed"]
    doirections = ["ciwang_direction","weiwang_direction","tuowang_direction"]
    colors = ['pink', 'lightblue', 'lightgreen']
    for i,file_name in tqdm(enumerate(file_types)):
        datax = get_random_one_traj(type=file_name)
        x_data = datax["速度"].loc[-1:].values
        y_data = datax["方向"].loc[-1:].values
        axes[i][0].plot(range(len(x_data)), x_data, label=speed_types[i], color=colors[i])
        axes[i][0].grid(alpha=2)
        axes[i][0].legend(loc="best")
        axes[i][1].plot(range(len(y_data)), y_data, label=doirections[i], color=colors[i])
        axes[i][1].grid(alpha=2)
        axes[i][1].legend(loc="best")

在这里插入图片描述

速度方向数据分布

def get_data_cummulation(type=None,path=None,kind=None,columns=None):
    data_dict = dict()
    with open(path + type+".pkl","rb") as file:
        data_list = pickle.load(file)
    for datax in tqdm(data_list):
        data = datax[kind].values
        for speed in data:
            data_dict.setdefault(speed,0)
            data_dict[speed] += 1
    data_dict = dict(sorted(data_dict.items(),key=lambda x:x[0],reverse=False))
    data_df = pd.DataFrame.from_dict(data_dict,columns=[columns],orient="index")
    return data_df
def get_speed_and_direction_distribution_data(type=None):
    path = "./data_tmp/"
    data_speed_df = get_data_cummulation(type=type, path=path,kind="速度",columns="speed")
    data_direction_df = get_data_cummulation(type=type,path=path,kind="方向",columns="direction")
    return data_speed_df,data_direction_df
df_speeds = []
df_directions = []


def plot_speed_direction1_distribution():
    plt.subplots(nrows=1, ncols=2, figsize=(15, 6))
    plt.subplots_adjust(wspace=0.3, hspace=0.5)

    file_types = ["ciwang_data", "weiwang_data", "tuowang_data"]
    lables = ["target==cw", "target==ww", "target==tw"]
    colors = ["red", "green", "blue"]

    for i, filenames in enumerate(file_types):
        df11, df21 = get_speed_and_direction_distribution_data(file_types[i])
        plt.subplot(1,2,1)
        ax1 = sns.kdeplot(df11["速度"].values / 1000000, color=colors[i],shade=True)
        plt.subplot(1,2,2)
        ax3 = sns.kdeplot(df21["方向"].values / 1000000, color=colors[i],shade=True)
        df_speeds.append(df11)
        df_directions.append(df21)
    ax1.legend(lables)
    ax1.set_xlabel("速度")
    ax3.set_xlabel("方向")
    ax3.legend(lables)
plot_speed_direction1_distribution()

在这里插入图片描述

def plot_speed_direction2_distribution():
    fig,axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 6))
    plt.subplots_adjust(wspace=0.3, hspace=0.5)
    colors_box = ['pink', 'lightblue', 'lightgreen']

    bplot1 = axes[0].boxplot([df_speeds[0]["speed"].values,df_speeds[1]["speed"].values,df_speeds[2]["speed"].values]
                       , vert=True
                       , patch_artist=True
                       , labels=["cw", "ww", "tw"])
    bplot2 = axes[1].boxplot([df_directions[0]["direction"].values, df_directions[1]["direction"].values, df_directions[2]["direction"].values]
                       , vert=True
                       , patch_artist=True
                       , labels=["cw", "ww", "tw"])

    for blpot in (bplot1,bplot2):
        for patch,color in zip(blpot["boxes"],colors_box):
            patch.set_facecolor(color)

    axes[0].set_title("速度")
    axes[1].set_title("方向")
plot_speed_direction2_distribution()	

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值