从pandas导入DataFrame,read_csv
将matplotlib.pyplot导入为plt
将pandas导入为pd
导入sys,os
导入matplotlib
##打印系统信息
#print(sys.version)
#print(pd .__ version__)
#print(matplotlib .__ version__)
##合并列
#names ='Bob Jessica Mary Mel'
#names = names.split()
#births = [968,155,77,578,973]
#BabyDataSet =列表(zip(名字,出生))
#print(BabyDataSet)
#写入csv文件
#df = pd.DataFrame(data = BabyDataSet,columns = ['names','birthday'])
#print(df)
#df.to_csv('birthday.csv',index = True,header = True)
##读取文件
#reader = r'/ home / martin / PycharmProjects / untitled / test.csv'
#df = pd.read_csv(reader,header = None)
#print(df)
##删除文件
#reader = r'/ home / martin / PycharmProjects / untitled / birthday.csv'
#os.remove(reader)
#输出类型
#print(df.dtypes)
#输出最大的前几个
#Sorted = df.sort_values(['birthday'],ascending = False)
#print(Sorted.head(3))
#print(df ['birthday']。max())
#画图
#df ['birthday']。plot()
#MaxValue = df ['birthday']。max()
#MaxName = df ['names'] [df ['birthday'] == df ['birthday']。max()]
#Text = str(MaxValue)+“,”+ MaxName
#plt.annotate(Text,xy =(1,MaxValue),xytext =(8,0),
#xycoords =('轴分数','数据'),textcoords ='偏移点')
#
#print(“hello”)
#df [df ['birthday'] == df ['birthday']。max()]
#plt.show()
入门教程2
将pandas导入为pd
将matplotlib.pyplot导入为plt
抱怨= pd.read_csv('test.csv')
#打印(投诉)
#前5名
打印(抱怨[ '名称'] [:5])
print(抱怨[['Name','Pclass']] [:5])
complaint_counts =抱怨['姓名']。value_counts()
打印(complaint_counts [:5])
#画图
complaint_counts [:5] .plot(种类= '巴')
plt.show()
入门教程3
读取csv文件:
def open_file(path):
data = pd.read_csv(path)
return data
写入csv文件:
data.to_csv('hour2.csv')
print(data)
删除列中有空值的列:
data = data.dropna(axis=1, how='any')
修改DataFrame中某个数据:
df.iat[1,3]=4 #将1行3列值修改为4
删除索引:
df.reset_index(drop = True)
设置索引:
df = df.set_index('data1')
更改头部:
df.columns = ['data1','data2']
转换类型:
df['data1'] = pd.to_numeric(df['data1'])
更换索引:
df.index = pd.date_range(start='2017-08-28 9:30', end='2017-08-28 16:00', freq='1min')
找到缺失的行:
set(range(范围)) - set(df.index)
填充缺失行:
df = df.reindex(range(范围), method='ffill')
删除值为NaN行
.dropna()