qsbk.py 代码
# -*- coding: utf-8 -*-
import scrapy
from ..items import QsbkItem
class QsbkSpider(scrapy.Spider):
name = 'qsbk'
allowed_domains = ['qiushibaike.com']
start_urls = ['https://www.qiushibaike.com/text/page/1/']
def parse(self, response):
for i in range(2,10):
next_url = 'https://www.qiushibaike.com/text/page/{}/'.format(i)
yield scrapy.Request(next_url, callback=self.parse_xq)
def parse_xq(self, response):
# with open('qsbk.html','wb') as f:
# f.write(response.body)
item = QsbkItem()
contents = response.xpath('//div[@class="content"]/span/text()').extract()
print(contents)
numbers = response.xpath('//span[@class="stats-vote"]/i/text()').extract()
print(numbers)
comments = response.xpath('//div[@class="stats"]/span[2]/a/i/text()').extract()
print(comments)
for i in range(0,len(numbers)):
content = ''.join(contents[i])
item['content'] = content
item['number'] = numbers[i] + '好笑'
item['comment'] = comments[i] + '评论'
yield item
items.py 文件代码
class QsbkItem(scrapy.Item):
content = scrapy.Field()
number = scrapy.Field()
comment = scrapy.Field()
def get_insert_sql(self):
sql = 'insert into qsbk_test(content,number,comment)values (%s,%s,%s)'
data = (self['content'],self['number'],self['comment'])
return (sql,data)
pipelines.py文件代码
class MysqlProjectPipeline(object):
def process_item(self, item, spider):
(insert_sql,data) = item.get_insert_sql()
myhelper = MysqlHelper()
myhelper.execute_modify_sql(insert_sql,data)
其他配置见文件scrapy基本框架位置