目标网页:网易招聘
抓取目标:
1.根据输入职业抓取该职业的全部数据
2.存入excel
用到的库:
import requests
import pandas
全部代码:
import requests
import pandas
#请求头链接
key = input('输入你要搜索的职业:')
def size(key): #返回总共有多少条相关招聘pageSize
data = {"currentPage":1, "pageSize":1, "keyword": key}
url = 'https://hr.163.com/api/hr163/position/queryPage'
res = requests.post(url, json=data)
json = res.json()
total = json['data']['total']
# print(total)
return total
page_size = size(key)
data = {"currentPage":1, "pageSize":page_size, "keyword": key}
url = 'https://hr.163.com/api/hr163/position/queryPage'
res = requests.post(url, json=data)
json = res.json()
# print(json)
total = json['data']['total']
json_list = json['data']['list']
print(total)
wangyi_list = []
# print(json_list)
for list in json_list:
# print(list)
# print(json_list[i])
# dict = json_list[i]
name = list['name']#职业名称
# print('正在储存----')
print(name)
xueli = list['reqEducationName']#学历
# print(xueli)
address = list['workPlaceNameList'][0]#地址
# print(address)
type = list['firstPostTypeName']#职业类型
# print(type)
number = list['postStatus']#职位
# print(number)
department = list['firstDepName']#部门
# print(department)
demand = list['reqWorkYearsName']#经验需求
# print(demand)
describe = list['requirement']#职位描述
# print(describe)
requirement = list['en_requirement']#职位需求
# print(requirement)
wangyi_list.append(
{
'职位名称':name,
'工作地址':address,
'工作部门':department,
'职业类型':type,
'需求人数':number,
'学历要求':xueli,
'经验需求':demand,
'职位描述':describe,
'职业要求':requirement
}
)
# print(wangyi_list)
ex = pandas.DataFrame(wangyi_list)
ex.to_excel("网易招聘-%s.xlsx"%key,index=False)
效果: