爬取拉钩数据并保存至Excel

import requests,xlwt

headers = {
    #客户端访问信息
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
    #来路,这个请求的上一个请求是谁
    'Referer':'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=',
    'Cookie':'user_trace_token=20171011164612-26182303-a722-4948-ac0f-4eea64769e4b; LGUID=20171011164613-a3656b0a-ae60-11e7-89e8-525400f775ce; index_location_city=%E5%85%A8%E5%9B%BD; JSESSIONID=ABAAABAAAFCAAEGF4FD126417B6EE0915FF3381A6C63B95; _gat=1; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; TG-TRACK-CODE=index_search; SEARCH_ID=06ebbf9e08074be9afb4ca7a25b5357b; _gid=GA1.2.188056782.1508746956; _ga=GA1.2.1248730130.1507711574; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1507711574,1508746957,1508747184,1508761809; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1508761831; LGSID=20171023203002-e490a74c-b7ed-11e7-a584-525400f775ce; LGRID=20171023203025-f1d4a497-b7ed-11e7-960c-5254005c3644'
}

def getJobList(page):
    data = {
        'first':'false',
        'pn':page,
        'kd':'python'
    }
    #发起post请求
    res = requests.post('https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false&isSchoolJob=0',
                        data=data,headers=headers)
    result = res.json()
    jobs = result['content']['positionResult']['result']
    return jobs


#创建excel对象
excelBook = xlwt.Workbook()
sheet1 = excelBook.add_sheet('lagou',cell_overwrite_ok=True)

#设置表头
sheet1.write(0,0,'岗位名称')
sheet1.write(0,1,'薪资范围')
sheet1.write(0,2,'工作年限')
sheet1.write(0,3,'学历')
sheet1.write(0,4,'全职/兼职')
sheet1.write(0,5,'城市')
sheet1.write(0,6,'公司简称')
sheet1.write(0,7,'区域')
sheet1.write(0,8,'公司人数')
sheet1.write(0,9,'岗位类型')
sheet1.write(0,10,'公司全称')

index = 1
for page in range(1,31):
    print(page)
    for job in getJobList(page):
        #print(job)
        sheet1.write(index,0,job['positionName'])
        sheet1.write(index,1,job['salary'])
        sheet1.write(index,2,job['workYear'])
        sheet1.write(index,3,job['education'])
        sheet1.write(index,4,job['jobNature'])
        sheet1.write(index,5,job['city'])
        sheet1.write(index,6,job['companyShortName'])
        sheet1.write(index,7,job['district'])
        sheet1.write(index,8,job['companySize'])
        sheet1.write(index,9,job['industryField'])
        sheet1.write(index,10,job['companyFullName'])
        index += 1
excelBook.save('lagou.xls')

 

转载于:https://my.oschina.net/friendship/blog/1555125

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值