df_city=data_1_c.groupby('居住地').count()
# print(df_city)
data_2['city']=data_2['地区'].str[:-1] #map(lambda x:x[:-1] ,data_2['地区'])
# print(data_2)
q1data=pd.merge(df_city,data_2,left_index=True,right_on='city',how='inner')
q1data['知友密度']=q1data['_id']/q1data['常住人口'] #相除的关系
# print(q1data.head())
#函数标准化标 准化计算方法 = (X - Xmin) / (Xmax - Xmin)
def data_normal(df,*col):
colnames=[]
for i in col:
colname=i +'_nor'
df[colname]=(df[i]-df[i].min())/(df[i].max()-df[i].min())*100
colnames.append(colname)
return (df,colnames)
resuldata=data_normal(q1data,'_id','知友密度')[0]
# print(resuldata)
resulcolnames=data_normal(q1data,'_id','知友密度')[1]
# print(resulcolnames) #['_id_nor', '知友密度_nor']
q1data_20_sl=resuldata.sort_values(resulcolnames[0],ascending=False)[['city',resulcolnames[0]]].iloc[:20]
q1data_20_md=resuldata.sort_values(resulcolnames[1],ascending=False)[['city',resulcolnames[1]]].iloc[:20]
# print(q1data_20_sl)
fig1=plt.figure(num=1,figsize=(15,8))
y1=q1data_20_sl[resulcolnames[0]]
print(y1.iloc[1])
plt.bar(range(20),
y1,
width=0.8,
edgecolor='k',
facecolor='yellowgreen',
tick_label=q1data_20_sl['city'] #设置x轴的标签,由于x轴的文字性。不太好用text #前面用range代替,一边后面text 这里在此重新返回
)
plt.title('知友数量TOP20')
plt.grid(True,linestyle='--',color='gray',linewidth=0.5,axis='y')
for i,j in zip(range(20),y1):
plt.text(i-0.3,y1.iloc[i]+1,'%.1f'%j,color='k',fontsize=9)
plt.show()