爬虫+pyecharts分析前途无忧招聘职位数量
关于pyecharts的安装与使用可参考:http://pyecharts.org/#/zh-cn/intro
代码如下
from pyecharts.charts import Bar
from pyecharts import options as opts
import requests
from urllib import parse
import urllib
import re
def oneparse(oneurl):
oneurls = 'https://search.51job.com/list/030200%252C040000,000000,0000,00,9,99,{},2,1.html?'
response = requests.get(oneurls.format(oneurl))
html = response.content.decode("gbk")
oneSL = re.findall('共(.*?)条职位',html)[0]
print(oneSL)
SL_list.append(oneSL)
def twoparse(twourl):
twourls = 'https://search.51job.com/list/030200%252C040000,000000,0000,00,9,99,{},2,1.html?'
response = requests.get(twourls.format(twourl))
html = response.content.decode("gbk")
twoSL = re.findall('共(.*?)条职位',html)[0]
print(twoSL)
SL_list.append(twoSL)
def threeparse(threeurl):
threeurls = 'https://search.51job.com/list/030200%252C040000,000000,0000,00,9,99,{},2,1.html?'
response = requests.get(threeurls.format(threeurl))
html = response.content.decode("gbk")
threeSL = re.findall('共(.*?)条职位',html)[0]
print(threeSL)
SL_list.append(threeSL)
def fourparse(foururl):
foururls = 'https://search.51job.com/list/030200%252C040000,000000,0000,00,9,99,{},2,1.html?'
response = requests.get(foururls.format(foururl))
html = response.content.decode("gbk")
fourSL = re.findall('共(.*?)条职位',html)[0]
print(fourSL)
SL_list.append(fourSL)
if __name__ == '__main__':
SL_list = []
onename = str(input())
twoname = str(input())
threename = str(input())
fourname = str(input())
oneurl = urllib.parse.quote(onename)
twourl = urllib.parse.quote(twoname)
threeurl = urllib.parse.quote(threename)
foururl = urllib.parse.quote(fourname)
oneparse(oneurl)
twoparse(twourl)
threeparse(threeurl)
fourparse(foururl)
name=[onename, twoname, threename, fourname]
bar = Bar()
bar.add_xaxis(name)
bar.add_yaxis("前途无忧", SL_list)
bar.set_global_opts(title_opts=opts.TitleOpts(title="广州+深圳的职位数量"))
bar.render()
运行结果