![](https://img-blog.csdnimg.cn/20201014180756754.png?x-oss-process=image/resize,m_fixed,h_64,w_64)
scrapy
6点就起床
一个人
展开
-
基于scrapy框架爬取新浪体育部分板块内容
import scrapyfrom selenium import webdriverfrom sohuPro.items import SohuproItemclass SohuSpider(scrapy.Spider): name = 'sohu' # allowed_domains = ['www.xxx.com'] start_urls = ['http://sports.sina.com.cn/'] #需求:爬取新浪体育欧冠,西甲,意甲,德甲等5大板块中的新闻原创 2020-12-08 21:35:23 · 383 阅读 · 1 评论 -
Scrapy模块爬取中华英才网招聘信息(分页)
import scrapyfrom fenye.items import FenyeItemimport requestsclass ZhfySpider(scrapy.Spider): name = 'zhfy' # allowed_domains = ['www.xxx.com'] start_urls = ['http://www.chinahr.com/channel/rizhao/pn1/'] # 分页操作 urls = 'http://www.chi原创 2020-12-06 16:03:56 · 389 阅读 · 2 评论 -
scrapy框架基于管道的持久化存储,将数据存储到Mysql数据库
import scrapyfrom kjpro2.items import Kjpro2Itemclass A58tcSpider(scrapy.Spider): name = '58tc' # allowed_domains = ['www.xxx.com'] start_urls = ['http://www.chinahr.com/channel/rizhao/pn2/'] def parse(self, response): li_list=res原创 2020-12-05 21:49:37 · 318 阅读 · 0 评论 -
Scrapy模块爬取中华英才网招聘信息(未分页)
import scrapyimport timefrom kjPro.items import KjproItemclass ZhhSpider(scrapy.Spider): name = 'zhh' # allowed_domains = ['www.xxx.com'] start_urls = ['http://www.chinahr.com/channel/rizhao/'] def parse(self, response): li_list=原创 2020-12-04 22:37:20 · 350 阅读 · 0 评论