python爬虫实战——前程无忧(requests+xpath+csv)

源码奉上

import requests
from lxml import etree
import csv
import pymysql
#获取请求
def get_response(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3534.4 Safari/537.36'}
    response = requests.get(url,headers=headers)
    response.encoding = 'gbk'
    return response.text

#分析源码,获取信息所属块状位置
def get_html(html):
    html = etree.HTML(html)
    infos = html.xpath('//div[@class="el"]')[4:]
    info = []
    for i in infos:
        key = {}
        key['job_name'] = str(i.xpath('.//p[@class="t1 "]//a/@title')).strip("[']")
        key['company_name'] = str(i.xpath('.//span[@class="t2"]/a/text()')).strip("[']")
        key['work_space'] = str(i.xpath('.//span[@class="t3"]/text()')).strip("[']")
        key['work_pay'] = str(i.xpath('.//span[@class="t4"]/text()')).strip("[']")
        info.append(key)
        print(key)
    return info

#保存数据至CSV文件中
def save_data(info):
    headers = ['职位名称','公司名称','工作地点','薪资']
    with open('qianchengwuyou.csv','a+',encoding='UTF-8',newline='') as fp:
        writer = csv.writer(fp)
        writer.writerow(headers)
        for key in info:
            writer.writerow([key['job_name'],key['company_name'],key['work_space'],key['work_pay']])


if __name__ == '__main__':
    urls = ['https://search.51job.com/list/120200,000000,0000,00,9,99,%25E5%25A4%25A7%25E6%2595%25B0%25E6%258D%25AE,2,{}.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=%EF%BC%89'.format(i) for i in range(1,9)]
    for url in urls:
        html = get_response(url)
        info = get_html(html)
        save_data(info)

爬取结果
在这里插入图片描述

  • 2
    点赞
  • 14
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值