import sys
reload(sys)
sys.setdefaultencoding("utf-8")
import urllib2
import urllib
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
url='https://www.we.com/loan#page-'
title=[]
amount=[]
interest=[]
months=[]
for i in range(1,52):
url = url + str(i)
myPage = urllib2.urlopen(url).read()
html=myPage.decode('utf-8')
a=re.findall(r'"title":(.*?),',html) #提取title字段
b=re.findall(r'"amount":(.*?),',html)
c=re.findall(r'"interest":(.*?),',html)
d=re.findall(r'"months":(.*?),',html)
title.extend(a)
amount.extend(b)
interest.extend(c)
months.extend(d)
#转换字段类型
rrd=pd.DataFrame({'title':title,'amount':amount,'interest':interest,'months':months})
rrd[['amount','interest','months']]=rrd[['amount','interest','months']].astype(np.float64)
title_count=rrd.groupby('title')['amount'].agg('count')
title_sum=rrd.groupby('title')['amount'].agg('sum')
#print title_count
#print title_sum
plt.rc('font', family='STXihei', size=10)
a=np.array([1,2,3,4,5])
plt.figure()
#此处双向条形图无法实现,存疑。是否可以建2张图,然后共用y轴
plt.barh(a,title_count, color = 'r',align='center', alpha = .5)
plt.barh(a,-title_sum, color = 'b',align='center', alpha = .5)
plt.ylabel('贷款用途分类')
plt.title('贷款用户金额及笔数')
plt.xticks(a,(''))
plt.yticks(a,('扩大生产/经营','教育培训','日常生活消费','装修','资金周转'))
plt.show()
爬取人人贷
最新推荐文章于 2021-10-29 14:57:18 发布