数据处理之Pandas

  • 一:CSV文档制作格式

  • Station_Name,Province
    庐山,江西省  
    庐山,江西省   
    庐山,江西省 

    二:CSV读取+画图

  • # -*- coding: utf-8 -*-
    """
    Created on Thu Aug  1 11:58:47 2019
    function:csv_to_png temp&ratio
    @author: WU ZEPEI
    """
    import numpy as np
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    from pylab import *   #显示中文
    from matplotlib.pyplot import MultipleLocator
    import pandas as pd
    
    mpl.rcParams['font.sans-serif'] = ['SimHei'] #为了中文字体不乱码 
    mpl.rcParams['axes.unicode_minus']=False #能够显示负号
    
    t_data = pd.read_csv(r'C:\Users\Administrator\Desktop\test17.9.2.csv')
    #列表操作
    a = t_data['percent'][0:6]#切片操作
    b = t_data['numbers'][0:6]
    
    plt.plot(a,b,color='green', label='线状')#画图
    
    #plt.plot(a,b,color='red', label='霰状')
    #plt.plot(a,b,color='skyblue', label='六角状')
    #plt.plot(a,b,color='cyan', label='柱状')
    #plt.plot(a,b,color='yellow', label='混合状')
    #plt.plot(a,b, color='gray', label='枝状')
    
    plt.title('Result Analysis')#标题
    plt.legend() # 显示图例 途中那个标签的位置设置
    plt.ylim((12, -40))#设置刻度
    plt.xlabel('Ratio')#横轴标题
    plt.ylabel('Temp(℃)')#纵轴标题
    plt.show()

    三:读取txt 以空格为间隔

  • # -*- coding: utf-8 -*-
    """
    Created on Tue Oct 22 17:05:28 2019
    csv table
    @author: Administrator
    """
    import pandas as pd
    data = pd.read_table('C:/Users/Administrator/Desktop/20180101-20180131.txt',encoding='utf-8',delim_whitespace=True,index_col=0)
    #header=None:没有每列的column name,可以自己设定
    #encoding='utf-8':
    #delim_whitespace=True:用空格来分隔每行的数据
    #index_col=0:设置第1列数据作为index
    a = data['Province']
    print(a)
    #a = t_data['percent'][0:6]
    

    四:设置数据输出CSV

  • # -*- coding: utf-8 -*-
    """
    Created on Thu Jul 25 09:05:06 2019
    function:test to csv
    @author: Wuzepei
    """
    #yfit = model.predict(test_data) #预测值
    n = len(yfit)
    a=0;b=0;c=0;d=0;e=0;f=0;g=0
    for i in range(n):
        if yfit[i] == 0:
            a+=1
        elif yfit[i] == 1:
            b+=1
        elif yfit[i] == 2:
            c+=1
        elif yfit[i] == 3:
            d+=1
        elif yfit[i] == 4:
            e+=1
        elif yfit[i] == 5:
            f+=1
        elif yfit[i] == 6:
            g+=1
    L = []
    h = a+b+c+d+e+f+g
    L = [[a,a/h],[b,b/h],[c,c/h],[d,d/h],[e,e/h],[f,f/h],[g,g/h],[h,h/h]]
    import pandas as pd
    name = ['numbers','percent']
    test = pd.DataFrame(columns=name,data=L,index=['','','','','','','',''])
    test.to_csv('C:/Users/Administrator/Desktop/数据/据/test0.csv')#csv写年月日
    # -*- coding: utf-8 -*-
    """
    Created on Fri Sep 13 16:49:32 2019
    
    @author: Administrator
    """
    from PIL import Image
    import matplotlib.pyplot as plt
    import os,sys
    from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
    from keras.utils import np_utils
    from PIL import Image
    from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
    from array import array
    import numpy as np
    from sklearn.utils import shuffle
    import pickle
    import pandas as pd
    # Resize the images
    pathTrainDataBomei = r'F:\CFF竞赛\天气识别\Test'
    listingDog = os.listdir(pathTrainDataBomei) #图片原地址
    
    print(listingDog)
    
    name = ['FileName']
    test = pd.DataFrame(columns=name,data=listingDog)
    test.to_csv(r'F:\CFF竞赛\天气识别\res.csv',index=False)#csv写年月日 没有前面索引标号

    五:多数据画图

  • import numpy as np
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    import pandas as pd
    from pylab import *   #显示中文
    from matplotlib.pyplot import MultipleLocator
    mpl.rcParams['font.sans-serif'] = ['SimHei'] 
    mpl.rcParams['axes.unicode_minus']=False
    #这里导入你自己的数据
    #......
    #......
    #x_axix,train_pn_dis这些都是长度相同的list()
     
    #开始画图
    #sub_axix = filter(lambda x:x%200 == 0, [1,2,3])
    
    
    
    plt.title('Result Analysis')
    plt.plot([0.008,0.02,0.00,0.025,0.19,0.12,0.21,0.10,0.13,0.12,0.04,0.04,0.11,0.07,0.065,0.06,0.04,0.09,0.16,0.03,0.27,0.07,0.16,0.02,0.32],[-42,-40,-38,-28,-26,-24,-22,-20,-18,-16,--14,-12,-10,-8,-6,-4,-2,0,2,4,6,8,10,12,14] ,marker='o', color='green', label='线状')
    plt.plot([0.10,0.16,0.25,0.19,0.48,0.51,0.76,0.53,0.74,0.75,0.65,0.80,0.78,0.74,0.71,0.60,0.77,0.83,0.79,0.79,0.69,0.85,0.80,0.94,0.64],[-42,-40,-38,-28,-26,-24,-22,-20,-18,-16,--14,-12,-10,-8,-6,-4,-2,0,2,4,6,8,10,12,14] ,marker='D', color='red', label='霰状')
    plt.plot([0.11,0.08,0.17,0.21,0.06,0.07,0.001,0.051,0.18,0.019,0.044,0.014,0.011,0.02,0.01,0.04,0.02,0.01,0.005,0,0.006,0.009,0.007,0.001,0.004],[-42,-40,-38,-28,-26,-24,-22,-20,-18,-16,--14,-12,-10,-8,-6,-4,-2,0,2,4,6,8,10,12,14] ,marker='H', color='skyblue', label='六角状')
    plt.ylim((18, -45))#设置刻度
    #设置小刻度
    #y_major_locator=MultipleLocator(2)
    #ax=plt.gca()
    #ax.yaxis.set_major_locator(y_major_locator)
    #plt.plot(, , color='red', label='testing accuracy')
    #plt.plot(, ,  color='skyblue', label='PN distance')
    #plt.plot(, , color='blue', label='threshold')
    plt.legend() # 显示图例
     
    plt.xlabel('Ratio')
    plt.ylabel('Temp(℃)')
    plt.show()

    六、利用os读取处理图像

  • row,column = 200,200  #图片大小47 62
    #1
    pathTrainDataBomei = r'F:\CFF竞赛\天气识别\train_9\筛选end\train_set\GRAY'
    listingDog = os.listdir(pathTrainDataBomei) #图片原地址
    print(listingDog)
    for file in listingDog:
        if file != '.DS_Store' :
            img = Image.open(pathTrainDataBomei + '\\' + file )
            resizeImg = img.resize((row,column))
            resizeImg.save(r"F:\CFF竞赛\天气识别\train_9\筛选end\train_set\GRAY" + '\\' + file)
    image_data = [] 
    
    pathResizedTrainDataBomei = r'F:\CFF竞赛\天气识别\train_9\筛选end\train_set\GRAY'
    listingDog= os.listdir(pathResizedTrainDataBomei)
    for file in listingDog:
        if file != '.DS_Store' :
            img = Image.open(pathResizedTrainDataBomei + '\\' + file)
            x = np.array(img)
            x = x.astype('float32')
            #print(type(x))
            #print(x)
            image_data.append(x)
    pass

     

  • 八:单柱状图

  • Created on Thu Aug  8 15:31:20 2019
    function:单柱状图
    @author: wuzepei
    """
    #coding:utf-8
    import matplotlib.pyplot as plt
    
    #解决中文乱码问题
    plt.rcParams['font.sans-serif'] = ['simHei'] 
    plt.rcParams['axes.unicode_minus'] = False
    times = [13.0,6.29]
    x = [1,2]
    #plt.xlabel(u"Plan")
    plt.ylabel(u"tiem(s)",fontsize = 15)
    plt.xticks(x, [u"Original", u"Morphological"],fontsize = 15)#刻度标
    plt.yticks(fontsize = 15)
    plt.title('Comparison of results(Original&Morphological)',fontsize = 15)
    plt.bar(x=x, height=times)
    plt.show()

    九:水平柱状图

  • # -*- coding: utf-8 -*-
    """
    Created on Thu Jul 25 08:15:52 2019
    Function:水平柱状图
    @author: WU ZEPEI
    """
    import matplotlib.pyplot as plt
    import numpy as np
    from pylab import *   #显示中文
    
    mpl.rcdefaults()
    matplotlib.rc_file_defaults()
    mpl.rcParams['font.sans-serif'] = ['SimHei']
    # 构建数据
    x_data = ['', '','', '', '', '', '', '','']
    y_data = [84, 89,78, 86, 89, 60, 94, 89,97]
    #y_data = [94,100,100, 81, 97, 84, 97, 95,95]
    #y_data2 = [52000, 54200, 51500,58300, 56800, 59500, 62700]
    bar_width=0.6
    # Y轴数据使用range(len(x_data), 就是0、1、2...
    plt.figure(figsize=(8.5,6))
    plt.barh(y=range(len(x_data)), width=y_data, 
        alpha=1, height=bar_width)#alpha 透明度 height 宽  width值
    # Y轴数据使用np.arange(len(x_data))+bar_width,
    # 就是bar_width、1+bar_width、2+bar_width...这样就和第一个柱状图并列了
    #plt.barh(y=np.arange(len(x_data))+bar_width, width=y_data2,
       # label='C语言基础', color='indianred', alpha=0.8, height=bar_width)
    # 在柱状图上显示具体数值, ha参数控制水平对齐方式, va控制垂直对齐方式
    for y, x in enumerate(y_data):
        plt.text(x+2, y-bar_width/4, '%s' % x, ha='center', va='bottom',fontsize = 15)#给水平柱加标签 前面两个参数影响上下左右
    #for y, x in enumerate(y_data2):
        #plt.text(x+5000, y+bar_width/2, '%s' % x, ha='center', va='bottom')
    # 为Y轴设置刻度值
    plt.yticks(np.arange(len(x_data))+bar_width/8, x_data,fontsize = 15)#前面的数字被后面的代替了
    # 设置标题
    #plt.title("冰晶态分类(原始-特征结果)")
    
    plt.title("Classification of ice crystals (Original grayscale image results)",fontsize = 15)
    # 为两条坐标轴设置名称
    plt.xlabel("prediction precision(%)",fontsize = 15)
    #plt.ylabel("年份")
    # 显示图例
    #plt.legend()
    plt.show()

    十、垂直柱状图

  • # -*- coding: utf-8 -*-
    """
    Created on Thu Aug  8 15:10:50 2019
    function:垂直柱状图
    @author: wuzepei
    """
    
    import matplotlib
    import matplotlib.pyplot as plt
    import numpy as np
    
    #0-linear ,1-graupel,2-circle,3-Hexagonal,4-columnar,5-mixed,
    #6-dendtritic,7-Broken
    labels = ['', '','', '', '', '', '', '','']
    Original  = [84, 89, 78, 86, 89, 60, 94,89,97]
    Morphological  = [94,100,100, 81, 97, 84, 97, 95,95]
    
    x = np.arange(len(labels))  # the label locations
    width = 0.35  # the width of the bars
    
    fig, ax = plt.subplots()
    rects1 = ax.bar(x - width/2, Original, width)
    rects2 = ax.bar(x + width/2, Morphological, width)
    
    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel('precision(%)')
    ax.set_title('Comparison of results(Original&Morphological)')
    ax.set_xticks(x)
    ax.set_xticklabels(labels)
    ax.legend()
    
    
    def autolabel(rects):
        """Attach a text label above each bar in *rects*, displaying its height."""
        for rect in rects:
            height = rect.get_height()
            ax.annotate('{}'.format(height),
                        xy=(rect.get_x() + rect.get_width() / 2, height),
                        xytext=(0, 3),  # 3 points vertical offset
                        textcoords="offset points",
                        ha='center', va='bottom')    #加上前面的标志
    
    
    autolabel(rects1)
    autolabel(rects2)
    
    fig.tight_layout()
    
    plt.show()

     

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值