运行scrapy 中的spider
新建run.py
from scrapy import cmdline cmdline.execute("scrapy crawl tq".split(" "))
判断获取元素的类型
print(type(sevenday(对象)))
获取对象中所有数据 对象.getall(); 对象.get()中的第一元素
组装数组:
在items中组装字段
import scrapy from ..items import TqybItem class TqSpider(scrapy.Spider): name = 'tq' allowed_domains = ['weather.com.cn'] start_urls = ['http://www.weather.com.cn/weather/101010100.shtml'] def parse(self, response): #print(response.text) sevendays = response.xpath('//div[@class="con today clearfix"]//div[@id="7d"]//h1/text()').getall() #print(type(sevendays)) for sevenday in sevendays: day = sevenday print(type(sevenday)) item = TqybItem(day=day) yield item #发给pipelines
yield item 发给管道