# -*- coding: utf-8 -*-
import scrapy
from ..items import BookItem
class ZhSpider(scrapy.Spider):
name = 'zh'
allowed_domains = ['zongheng.com']
start_urls = ['http://book.zongheng.com/quanben/c0/c0/b9/u0/p1/v9/s1/t0/ALL.html']
def parse(self, response):
lis = response.xpath('//ul[@class="main_con"]/li')
for li in lis:
# 判断是否为包含小说信息的li标签
if li.xpath('@class').extract_first('') == '':
# 从li中提取数据
# 小说类型
novel_type = li.xpath('span[@class="kind"]/a/text()').extract_first('未知')
# 小说名称
novel_name = li.xpath('span/a[@class="fs14"]/text()').extract_first('')
# 小说字数
novel_number = li.xpath('span[@class="number"]/text()').extract_first('')
# 去除数据中的特殊符号
novel_number = novel_number.replace('\n','')
novel_number = novel_number.replace('\r','')
novel_number = novel_number.replace('\t','')
# 小说作者
novel_author = li.xpath('span[@class="author"]/a/text()').extract_first('匿名')
# 更新时间
update_time = li.xpath('span[@class="time"]/text()').extract_first('已完结')
# 去除数据中的特殊符号
update_time = update_time.replace('\n','')
update_time = update_time.replace('\r','')
update_time = update_time.replace('\t','')
# 创建item对象
item = BookItem()
item['novel_type'] = novel_type
item['novel_name'] = novel_name
item['novel_number'] = novel_number
item['novel_author'] = novel_author
item['update_time'] = update_time
yield item
import scrapy
from ..items import BookItem
class ZhSpider(scrapy.Spider):
name = 'zh'
allowed_domains = ['zongheng.com']
start_urls = ['http://book.zongheng.com/quanben/c0/c0/b9/u0/p1/v9/s1/t0/ALL.html']
def parse(self, response):
lis = response.xpath('//ul[@class="main_con"]/li')
for li in lis:
# 判断是否为包含小说信息的li标签
if li.xpath('@class').extract_first('') == '':
# 从li中提取数据
# 小说类型
novel_type = li.xpath('span[@class="kind"]/a/text()').extract_first('未知')
# 小说名称
novel_name = li.xpath('span/a[@class="fs14"]/text()').extract_first('')
# 小说字数
novel_number = li.xpath('span[@class="number"]/text()').extract_first('')
# 去除数据中的特殊符号
novel_number = novel_number.replace('\n','')
novel_number = novel_number.replace('\r','')
novel_number = novel_number.replace('\t','')
# 小说作者
novel_author = li.xpath('span[@class="author"]/a/text()').extract_first('匿名')
# 更新时间
update_time = li.xpath('span[@class="time"]/text()').extract_first('已完结')
# 去除数据中的特殊符号
update_time = update_time.replace('\n','')
update_time = update_time.replace('\r','')
update_time = update_time.replace('\t','')
# 创建item对象
item = BookItem()
item['novel_type'] = novel_type
item['novel_name'] = novel_name
item['novel_number'] = novel_number
item['novel_author'] = novel_author
item['update_time'] = update_time
yield item