创建项目:
scrapy startproject scrapyProject
创建项目下的小爬虫:
scrapy genspider s_tencent careers.tencent.com
目录
1.spiders文件夹下的s_tencent.py
# -*- coding: utf-8 -*-
import scrapy
import json
from scrapyProject.items import ScrapyprojectItem
class STencentSpider(scrapy.Spider):
name = 's_tencent'
allowed_domains = ['careers.tencent.com']
start_urls = []
for page in range(1, 62):
url = 'https://careers.tencent.com/tencentcareer/api/post/Query?keyword=python&pageIndex=%s&pageSize=10' % page
start_urls.append(url)
def parse(self, response):
# 读response的页面信息
content = response.body.decode('utf-8')
# json