涉及内容:第一:利用pandas实现文件的读写(重点在于读取不连续的列,通过usercols()函数实现)
第二:对时间进行格式化
第三:只提取时间的年和月,然后根据月分组并求和
第四:利用matplotlib画柱状图(展示对比)和线性图(展示趋势)
代码如下:
#encoding:utf-8 import pandas as pd import datetime import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D filePath="/Users/apple/PycharmProjects/AmericanPrisedent/presidential_polls.csv" data = pd.read_csv(filePath,usecols=['enddate','rawpoll_clinton','adjpoll_clinton','rawpoll_trump','adjpoll_trump']) df =pd.DataFrame(data) df['enddate'] = pd.to_datetime(df['enddate']) x=['%d-%02d' %(i.year,i.month)for i in df["enddate"]] month_array = np.array(x) df['enddate'] = x grouped = df.groupby('enddate',as_index=False).sum().ix[:,'rawpoll_clinton':'adjpoll_trump'] fig = plt.figure() ax1 = fig.add_subplot(2,2,1) ax2 = fig.add_subplot(2,2,2) ax3 = fig.add_subplot(2,2,3) ax4 = fig.add_subplot(2,2,4) #线性图 ax1.plot(grouped['rawpoll_clinton'],color = 'r') ax1.plot(grouped['rawpoll_trump'],color = 'g') # 生成柱状图 width = 0.25 x = np.arange(12) ax2.bar(x, grouped['rawpoll_clinton'], width, color='r') ax2.bar(x + width,grouped['rawpoll_trump'], width, color='g') ax2.set_xticks(x + width) ax2.set_xticklabels (('1','2','3','4','5','6','7','8','9','10','11','12'),rotation='vertical') plt.show()