以爬 小说吧 为例
import scrapy
import re
class QingrenSpider(scrapy.Spider):
name = 'qingren'
allowed_domains = ['tieba.baidu.com']
start_urls = ['https://tieba.baidu.com/p/5820130343']
f = open('走不出你.txt','a',encoding='utf-8')
def parse(self, response):
# 获取小说楼主的名字以及小说内容
div_list = response.xpath('//div[@class="l_post l_post_bright j_l_post clearfix "]')
# print(div_list)
第一种:获取总标签,遍历所有子标签,取得标签文本:
for div in div_list:
author = div.xpath('.//div[@class="louzhubiaoshi_wrap"]').extract()
# print(author)
if len(author) != 0:
content_list = div.xpath('.//div[@class=