爬取腾讯招聘网
#coding=utf-8
import urllib.parse
import urllib.request
import json
import jsonpath
#定义一个列表,存储所有招聘信息
joblist=[]
def getdata(jsonstr):
#把json字符串转换为对象
jsonobj=json.loads(jsonstr)
#抽取职业
title=jsonpath.jsonpath(jsonobj,'$..RecruitPostName')
# print(title)
#抽取工作地点
address=jsonpath.jsonpath(jsonobj,'$..LocationName')
# print(address)
#抽取工作内容
content=jsonpath.jsonpath(jsonobj,'$..Responsibility')
# print(content)
#抽取发布时间
time=jsonpath.jsonpath(jsonobj,'$..LastUpdateTime')
# print(time)
#抽取职业经验
exe=jsonpath.jsonpath(jsonobj,'$..RequireWorkYearsName')
# print(exe)
#抽取详细链接
uul=jsonpath.jsonpath(jsonobj,'$..PostURL')
# print(uul)
for i in range(len(title)):
job = {}
job["职业"] = title[i]
job["地点"] = address[i]
job["工作内容"] = content[i]
job["发布时间"] = time[i]
job["经验"] = exe[i]
job["详细链接"] = uul[i]
# 压入新list中
joblist.append(job)
print(joblist)
file = open('jobList.json', 'w')
conetnt = json.dumps(joblist, ensure_ascii=False)
# print(conetnt)
file.write(conetnt)
file.close()
def getjson(url):
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0'}
# 构造请求
request = urllib.request.Request(url, headers=header)
# 发送请求,返回请求结果
respose = urllib.request.urlopen(request)
# 解码
jsonstr= respose.read().decode('UTF-8')
# 打印结果
print(jsonstr)
# 调用抽取数据方法
getdata(jsonstr);
return jsonstr
def myspider(begin, end, key):
for page in range(begin,end + 1):
url="https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1700185119559&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword="+key+"&pageIndex="+str(page)+"&pageSize=10&language=zh-cn&area=cn"
#print(url)
jsonstr=getjson(url)
if __name__ == '__main__':
key =input("请输入:")
begin = int(input("请输入开始页码:"))
end = int(input("请输入结束页码:"))
#中文编码
key=urllib.parse.quote(key)
print(key)
myspider(begin,end,key)