import requests
from lxml import etree
import re
import json
import csv
import time
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
"cookie":"urlfrom=121113803; urlfrom2=121113803; adfbid=0; adfbid2=0; x-zp-client-id=8b91112b-10f9-4f3a-9a79-4bd8a65d2cda; sts_deviceid=17aaa2c6e37e5-00a2a5f9c93e51-3d385d08-2073600-17aaa2c6e38e6; sts_sg=1; sts_sid=17aaa2c6e3e2e-01c204e0824154-3d385d08-2073600-17aaa2c6e3f67; sts_chnlsid=121113803; zp_src_url=https%3A%2F%2Fwww.baidu.com%2Fother.php%3Fsc.Kf00000TzIOILY9n8fGUSPnvCjXz91Lzvs6CTU8a9qXQHnXh304oKtchuMdbk0h58txcjCiA43eZogbpDqhsejLaZ4NjTKUk0RIjnVr23jt4iPIloCHYeEkgElqClG0pz9t_jJkpHlyDWNfUn7r2vu-irLpSqAISdpA6QjJ8N3lhSgiycfJs2ZUZcIX1wueINkg6CT08BwndmxiosuLmVe7Zzgos.7Y_NR2Ar5Od669BCXgjRzeASFDZtwhUVHf632MRRt_Q_DNKnLeMX5Dkgbooo3eQr5gKPwmJCRnTxOoKKsTZK4TPHQ_U3bIt7jHzk8sHfGmEukmnTr59l32AM-YG8x6Y_f3lZgKfYt_QCJamJjArZZsqT7jHzs_lTUQqRHArZ5Xq-dKl-muCyrMWYv0.TLFWgv-b5HDkrfK1ThPGujYknHb0THY0IAYqd_xKJ6KdTvNzgLw4TARqn0K9u7qYXgK-5Hn0IvqzujLyktAJ0ZFWIWYk0ZNzU7qGujYkPHcLnHcsPjbz0Addgv-b5HDYnW6LnH630AdxpyfqnH04PHDYPjb0UgwsU7qGujYknWcLnsKsI-qGujYs0A-bm1dcHbc0TA-b5HDv0APGujY1P1D0mLFW5Hf1P1Ts%26ck%3D5303.1.84.394.155.394.155.87%26dt%3D1626352621%26wd%3D%25E6%2599%25BA%25E8%2581%2594%26tpl%3Dtpl_12273_25457_20875%26l%3D1527120492%26us%3DlinkName%253D%2525E6%2525A0%252587%2525E9%2525A2%252598-%2525E4%2525B8%2525BB%2525E6%2525A0%252587%2525E9%2525A2%252598%2526linkText%253D%2525E3%252580%252590%2525E6%252599%2525BA%2525E8%252581%252594%2525E6%25258B%25259B%2525E8%252581%252598%2525E3%252580%252591%2525E5%2525AE%252598%2525E6%252596%2525B9%2525E7%2525BD%252591%2525E7%2525AB%252599%252520%2525E2%252580%252593%252520%2525E5%2525A5%2525BD%2525E5%2525B7%2525A5%2525E4%2525BD%25259C%2525EF%2525BC%25258C%2525E4%2525B8%25258A%2525E6%252599%2525BA%2525E8%252581%252594%2525E6%25258B%25259B%2525E8%252581%252598%2525EF%2525BC%252581%2526linkType%253D; sajssdk_2015_cross_new_user=1; at=7a097eef75d4468993fb77354277e4d6; rt=8232518f51c14b1b9b791b0a0429e77b; acw_tc=276082a816263529833152876e9fce81f07c590bb3f5b105448f1519351320; ZP-ENV-FLAG=gray; sts_evtseq=3; ZP_OLD_FLAG=false; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221103913743%22%2C%22first_id%22%3A%2217aaa2c70f79d-0e47f11c840521-3d385d08-2073600-17aaa2c70f89ba%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E4%BB%98%E8%B4%B9%E5%B9%BF%E5%91%8A%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_utm_source%22%3A%22baidupcpz%22%2C%22%24latest_utm_medium%22%3A%22cpt%22%7D%2C%22%24device_id%22%3A%2217aaa2c70f79d-0e47f11c840521-3d385d08-2073600-17aaa2c70f89ba%22%7D; _uab_collina=162635303430102616818484; Hm_lvt_38ba284938d5eddca645bb5e02a02006=1626353034; Hm_lpvt_38ba284938d5eddca645bb5e02a02006=1626353034; LastCity=%E6%AD%A6%E6%B1%89; LastCity%5Fid=736"
}
def get_context(number):
url = "https://fe-api.zhaopin.com/c/i/similar-positions?number="+number
urll='https://jobs.zhaopin.com/'+number+'.htm'
html = requests.get(url=url, headers=header)
#print(html.json()['data']['data']['list'])
companyName,companyNumber,companySize,salary60,workCity,education,\
workingExp,property,companyUrl,positionURL,name,welfareLabel,number,cityId,cityDistrict,applyType,score,tag="","","","","","","","","","","","","","","","","",""
try:
for i in html.json()['data']['data']['list']:
companyName = i['companyName'] # 公司
companyNumber = i['companyNumber'] # ID
companySize = i['companySize'] # 规模
salary60 = i['salary60'] # 薪水
workCity = i['workCity'] # 城市
education = i['education'] # 学历
workingExp = i['workingExp'] # 工作经验
property = i['property'] #企业性质
companyUrl = i['companyUrl'] # 公司网址
positionURL = i['positionURL'] # 求职网址
name = i['name'] # 职位名称
# welfareLabel = i['welfareLabel'] # 福利
number = i['number'] # 编号
cityId = i['cityId'] # 城市id
cityDistrict = i['cityDistrict'] # 城市区域
applyType = i['applyType'] # 公司类型
score = i['score'] # 公司分数
tag=[] #标签
for j in i['welfareLabel']:
tag.append(j['value'])
tag="/".join(tag)
except:
pass
html = requests.get(url=urll,headers=header)
html_xpath = etree.HTML(html.text)
# miaosu = re.findall('<div class="describtion__detail-content">(.*?)</div></div><div class="job-address clearfix">', html.text)
miaosu = html_xpath.xpath('string(//*[@class="describtion__detail-content"])') # 提取子标签所有文本
print("----------------------"+miaosu)
miaosu = ''.join(miaosu)
# time.sleep(1)
fp = open('智联招聘_大数据.csv', 'a', newline='')
write = csv.writer(fp)
row = (companyName,name, tag ,companyNumber ,companySize, salary60,workCity,
education,workingExp,property,companyUrl,positionURL,name,number,cityId,cityDistrict,applyType,score,miaosu)
write.writerow(row)
print('正在写入----'+workCity+'----的职位数据'+'----------'+name)
fp.close()
# Web前端
def get_url(city):
key = '大数据' # 搜索关键字
url = 'https://fe-api.zhaopin.com/c/i/sou?pageSize=4000&cityId='+city+'&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1' \
'&kw='+key+'&kt=3&lastUrlQuery=%7B%22pageSize%22:%2260%22,%22jl%22:%22489%22,%22kw%22:%22%E5%A4%A7%E6%95%B0%E6%8D%AE%22,%22kt%22:%223%22%7D'
number = ''
url_head = 'https://jobs.zhaopin.com/'
html = requests.get(url = url, headers = header)
try:
for i in html.json()['data']['results']:
print("-----------"+i['number'])
get_context(i['number']) # 内容爬虫开始---/
except:
pass
url = 'https://sou.zhaopin.com/?jl=852&sf=0&st=0&kw=%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90%E5%B8%88&kt=3'
html = requests.get(url = url, headers = header).text
data =re.findall('<script>__INITIAL_STATE__=(.*?)</script>',html)
datas = json.loads(data[0])
try:
for i in datas["basic"]["dict"]["location"]["province"]:
get_url(i["code"])
except:
pass
zhaopin
最新推荐文章于 2021-07-17 13:45:03 发布