#爬取主程序
# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from ..items import SpainweatherItem
class SpainSpider(scrapy.Spider):
#爬取2016年西班牙的天气数据
name = 'Spain'
allowed_domains = ['www.mundomanz.com']
start_urls = ['http://www.mundomanz.com/']
def parse(self, response):
year = 2016
#判断每月的天数,爬取每天的历史天气数据
for month in range(1, 13):
if month == 2:
for day in range(1, 29):
if day<10:
base_url = 'http://www.mundomanz.com/meteo_p/mairtext?' \
'year=' + str(year) + '&month=0' + str(month) + '&day=0' + str(
day) + '&n_days=1&action=display'
else:
base_url = 'http://www.mundomanz.com/meteo_p/mairtext?' \
'year=' + str(year) + '&month=0' + str(month) + '&day=' + str(
day) + '&n_days=1&action=display'
print base_url
yield Request(base_url,callback=self.saveData)
if month in [4,6,9,11]:
if month<10:
for day in range(1, 31):
if day < 10:
base_url = 'http://www.mundomanz.com/meteo_p/mairtext?' \
'year=' + str(year) + '&month=0' + str(month) + '&day=0' + str(
day) + '&n_days=1&action=display'
else:
base_url