同我之前爬取的数据不一样
import scrapy
import json
# 执行命令:scrapy crawl qiubai
class QiubaiSpider(scrapy.Spider):
name = 'qiubai'
start_urls = ['http://www.XXXXX']
def parse(self, response):
id_list = []
a = 0
# /html/body/div[4]/div[1]/div[1]/div[2]/p[1]
li_list = response.xpath('//div[@class="content"]/p')
for li in li_list:
author = li.xpath('.//text()').extract()
author = ''.join(author)
print( author)
id_list.append(author)
fp = open('./qiubai.txt', 'w', encoding='utf-8')
json.dump(id_list, fp, ensure_ascii=False)
最主要的是应该写成:
//div[@class=“content”]/p
我写成了:[@class=“content”]/p
很久没爬数据忘记了