import pandas as pd
fpath = "D:\\python39\\pandas\\antlearnpandasmaster\\datas\\beijing_tianqi\\beijing_tianqi_2018.csv"
bjtq = pd.read_csv(fpath)
#print(bjtq,bjtq.shape)
#设定日期为索引,方便筛选数据
bjtq.set_index('ymd',inplace = True)
print(bjtq.head())
print(bjtq.index)
#替换掉温度的后缀℃
bjtq.loc[:,'bWendu'] = bjtq['bWendu'].str.replace('℃','').astype('int32')
bjtq.loc[:,'yWendu'] = bjtq['yWendu'].str.replace('℃','').astype('int32')
#print(bjtq.dtypes)
#print(bjtq.head())
#查询
print(bjtq.loc['2018-01-01','bWendu'])
print(bjtq.loc['2018-01-01',['bWendu','yWendu']])
print(bjtq.loc[['2018-01-01','2018-01-02','2018-01-03'],['bWendu','yWendu']])
print(bjtq.loc['2018-01-01':'2018-01-03','bWendu':'aqiInfo'])
print(bjtq.loc[(bjtq['bWendu']> 20) & (bjtq['bWendu']>15) & (bjtq['tianqi'] =='晴') & (bjtq['aqiLevel'] ==1)])
print(bjtq.loc[lambda bjtq:(bjtq['bWendu']<=30)&(bjtq['yWendu']>=15),:])
#自定义函数,查询9月份,空气质量好的数据
def query_my_data(df):
return df.index.str.startswith("2018-09") & df["aqiLevel"]==1
a= bjtq.loc[(bjtq.index.str.startswith('2018-09')) & (bjtq["aqiLevel"]==1) ,:]
#print(a)
b = bjtq.loc[query_my_data(bjtq),:]
print(b)
Pandas使用df.loc查询数据的方法:
1.使用单个label值查询数据
2.使用值列表批量查询
3.使用数值区间进行范围查询4.使用条件表达式查询
5.调用函数查询心
import pandas as pd
fpath = "D:\\python39\\pandas\\antlearnpandasmaster\\datas\\beijing_tianqi\\beijing_tianqi_2018.csv"
df = pd.read_csv(fpath)
#print(bjtq,bjtq.shape)
#设定日期为索引,方便筛选数据
#bjtq.set_index('ymd',inplace = True)
#print(df)
df.loc[:,"bWendu"] = df['bWendu'].str.replace('℃','').astype('int32')
df.loc[:,'yWendu'] = df['yWendu'].str.replace('℃','').astype('int32')
#计算温差
df.loc[:,'wencha'] = df['bWendu'] - df['yWendu']
def get_wendu_type(x):
if x['bWendu']>33:
return "高温"
if x['yWendu']<-10:
return '低温'
return "常温"
df.loc[:,"wendu_type"] = df.apply(get_wendu_type,axis = 1)
#print(df['wendu_type'].value_counts())
bf = df.assign(
ywendu_huashi = lambda x : x['yWendu']*9/5 +32,
bwendu_huashi = lambda x : x['bWendu'] * 9/5 +32
)
print(bf.head())
df['wencha_type'] = ''
df.loc[df['bWendu']-df['yWendu']>10,'wencha_type'] ="温差大"
df.loc[(df['bWendu']-df['yWendu']<=10) ,'wencha_type'] = '温差正常'
print(df['wencha_type'].value_counts())
print(df.describe())#数字类的统计
#查看单个Series的数据
print(df['bWendu'].mean(),df['bWendu'].max(),df['bWendu'].min())
#唯一去重和按值计数
#一般不用于数值列,而是枚举、分类列
print(df['fengxiang'].unique(),df['tianqi'].unique(),df['fengli'].unique())
#按值计数
print(df['fengxiang'].value_counts(),df['tianqi'].value_counts(),df['fengli'].value_counts())
#相关系数和协方差
print(df.cov())
print('====================================================')
print(df.corr())
print('====================================================')
print(df['aqi'].corr(df['bWendu']))
print(df['aqi'].corr(df['yWendu']))
print(df['aqi'].corr(df['bWendu']-df['yWendu']))