python3 scrapy 爬取腾讯招聘,数据存为Json格式

程序主要代码:(tencentPosition.py)

# -*- coding: utf-8 -*-
import scrapy
from tencent.items import TencentItem

class TencentpositionSpider(scrapy.Spider):
    name = "tencent"
    allowed_domains = ["tencent.com"]

    url = "https://hr.tencent.com/position.php?&start="
    offset = 0

    #第一次处理的url,之后就不会在此处取
    start_urls = [url + str(offset)]

    def parse(self, response):
        for each in response.xpath("//tr[@class='even'] | //tr[@class='odd']"):
            item = TencentItem()
            # 职位名称
            item['positionname'] = each.xpath("./td[1]/a/text()").extract()[0]
            # 详情链接
            item['positionlink'] = each.xpath("./td[1]/a/@href").extract()[0]
            # 职位类别
            r = each.xpath("./td[2]/text()").extract()
            item['positionType'] = r[0] if r else None
            # 招聘人数
            item['peopleNum'] = each.xpath("./td[3]/text()").extract()[0]
            # 工作地点
            item['workLocation'] = each.xpath("./td[4]/text()").extract()[0]
            # 发布时间
            item['publishTime'] = each.xpath("./td[5]/text()").extract()[0]

            
            yield item

        # if self.offset < 1680:   #爬取所有结果
        #     self.offset += 10
        # else:
        #     # break
        #     raise ("结束工作")
        yield scrapy.Request(self.url + str(self.offset), callback=self.parse)


管道文件(pipelines.py)
# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import json

class TencentPipeline(object):
    def __init__(self):
        self.filename = open("tencent.json","w",encoding = "utf-8")

    def process_item(self, item, spider):
        text = json.dumps(dict(item),ensure_ascii=False)
        self.filename.write(str(text))
        self.filename.write("\n")
        return item

    def close_spider(self,spider):
        self.filename.close()

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值