1.数据解析
import scrapy
class QiubaiSpider(scrapy.Spider):
name = "qiubai"
#一般注释掉这个
#allowed_domains = ["www.xxx.com"]
start_urls = ["https://movie.douban.com/review/best/"]
def parse(self, response):
div_list = response.xpath('//div[@class="review-list chart "]/div')
for div in div_list:
# extract将selector对象中data参数存储的字符串提取出来
comment = div.xpath('//div[@class="short-content"]').extract()
#列表转为字符串
comment = ''.join(comment)
print(comment)