写进文件中:
import scrapy
class SpellList(scrapy.Spider):
name = "SpellList"
start_urls = [
"https://pvp.qq.com/web201605/summoner.shtml",
]
def parse(self, response):
spellList = response.css("#spellList li")
spellList.css
for spell in spellList:
id = spell.css("li::attr(id)").extract_first()
img = spell.css("img::attr(src)").extract_first()
name = spell.css("p::text").extract_first()
fileName = '技能.txt' # 爬取的内容存入文件
f = open(fileName, "a+") # 追加写入文件
f.write("id:"+id ) # 写入ID内容
f.write('\n') # 换行
f.write("name:"+name) # 写入技能名字内容
f.write('\n') # 换行
f.write("img:https:"+ img) # 写入图片链接内容
f.write('\n') # 换行
f.close() # 关闭文件操作
Oh 上面执行完后,打开后有乱码将
f = open(fileName, "a+") # 追加写入文件
后改为:
f = open(fileName, "a+", encoding="utf-8") # 追加写入文件
用xpath实现:
spell_list = response.xpath("//ul[@id='spellList']/ancestor::*")
logging.info(spell_list)
for spell in spell_list:
id = spell.xpath("//li//@id").extract_first()
logging.info("id :" + id)
img = spell.xpath("//img//@src").extract_first()
logging.info("img :" + img)
name = spell.xpath("//p//text()").extract_first()
logging.info("name :" + name)