import pandas as pd
catering_sale='F:/python/python数据分析与挖掘实战/图书配套数据、代码/chapter3/demo/data/catering_sale.xls'
data=pd.read_excel(catering_sale,index_col=u'日期')
print(data.head())
print(data.tail())
print(len(data))
print(data.describe())
print(data.index)
print(data.columns)
#唯一值
#print(data['销量'].is_unique)
#print(data['销量'].unique())
#print(len(data['销量'].unique()))
#print(data['销量'].value_counts())
#缺失值
#print(data.isnull())
#print(data.dropna())
#print(data.fillna(0))
#重复值
#print(data.duplicated())
#print(data.drop_duplicates())
#排序
#print(data.sort_values(by='销量'))
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei']#用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False#用来正常显示负号
plt.figure()
p=data.boxplot(return_type='dict')
x=p['fliers'][0].get_xdata()
y=p['fliers'][0].get_ydata()
y.sort()
for i in range(len(x)):
if i > 0:
plt.annotate(y[i], xy = (x[i],y[i]), xytext=(x[i]+0.05-0.8/(y[i]-y[i-1]),y[i]))
else:
plt.annotate(y[i], xy=(x[i],y[i]), xytext=(x[i]+0.08,y[i]))
plt.show()
'''
for xx,yy in zip(x,y):
plt.text(xx,yy,str(yy))
'''
In[129] data.head(10)
Out[129]:
销量
日期
2015-03-01 51.0
2015-02-28 2618.2
2015-02-27 2608.4
2015-02-26 2651.9
2015-02-25 3442.1
2015-02-24 3393.1
2015-02-23 3136.6
2015-02-22 3744.1
2015-02-21 6607.4
2015-02-20 4060.3
箱型图判断异常值
最新推荐文章于 2024-06-21 19:24:42 发布