import requests,csv,time
def save_data(row):
f=open('大街网职位爬虫.csv','a',encoding='GBK',newline='',errors='ignore')
csv_writer = csv.writer(f)
csv_writer.writerow(row)
f.close()
def ParserResponse(response):
#解析函数
for i in range(30):
corpId = response['data']['list'][i]['corpId'] #职位ID
jobName = response['data']['list'][i]['jobName'] #招聘名称
compName = response['data']['list'][i]['compName'] #公司名
pubCity = response['data']['list'][i]['pubCity'] #城市名
salary = response['data']['list'][i]['salary'] #薪水
try:
pubEdu = response['data']['list'][i]['pubEdu'] # 学历要求
except:
pubEdu ="无学历要求"
try:
pubEx = response['data']['list'][i]['pubEx'] # 经验要求
except:
pubEx = "无经验要求"
try:
industryName = response['data']['list'][i]['industryName'] #行业
except:
industryName = '无行业分类'
a = [corpId,pubCity,jobName,compName,salary,industryName,pubEx,pubEdu]
save_data(a)
print(a)
# list1.append([corpId,pubCity,jobName,compName,salary,industryName,pubEx,pubEdu])
# print(list1)
def main():
#主函数
firstUrl = 'https://so.dajie.com/job/search'
for page in range(23):
# 东莞的大街
url = 'https://so.dajie.com/job/ajax/search/filter?keyword=&order=0&city=441900&recruitType=&salary=&experience=&page={}&positionFunction=&_CSRFToken=&ajax=1'.format(page)
# 全国的大街
# url = 'https://so.dajie.com/job/ajax/search/filter?keyword=&order=0&city=&recruitType=&salary=&experience=&page={}&positionFunction=&_CSRFToken=&ajax=1'.format(page)
session = requests.session()
headers={
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36'
}
session.get(firstUrl)
session.headers['referer'] = firstUrl
response = session.get(url).json()
ParserResponse(response)
# time.sleep(3)
# break
if __name__=='__main__':
main()
【爬虫】大街网爬虫
最新推荐文章于 2022-05-20 15:56:43 发布