import scrapy
import json
# 执行命令:scrapy crawl qiubai
class QiubaiSpider(scrapy.Spider):
name = 'qiubai'
start_urls = ['https://www.baidu.com']
def parse(self, response):
id_list = []
a = 0
li_list = response.xpath('//*[@id="cnblogs_post_body"]/div')
for li in li_list:
author = li.xpath('.//text()').extract()
author = ''.join(author)
a += 1
print( author)
id_list.append(author)
fp = open('./qiubai.txt', 'w', encoding='utf-8')
json.dump(id_list, fp, ensure_ascii=False)
scrapy爬取文件
最新推荐文章于 2023-10-01 15:28:48 发布