scrapy爬取天气预报

最新推荐文章于 2022-12-15 02:27:45 发布

璐南熙

最新推荐文章于 2022-12-15 02:27:45 发布

阅读量591

点赞数

分类专栏：爬虫

本文链接：https://blog.csdn.net/big_data_vicky/article/details/102535878

版权

爬虫专栏收录该内容

8 篇文章 0 订阅

订阅专栏

scrapy爬取天气预报

item.py

import scrapy


class WeatherItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    city = scrapy.Field()
    date = scrapy.Field()
    week = scrapy.Field()
    img = scrapy.Field()
    state= scrapy.Field()
    temmax= scrapy.Field()
    temmin = scrapy.Field()
    wind = scrapy.Field()

weather.py

import scrapy
from weather.items import WeatherItem

class WeathersSpider(scrapy.Spider):
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36'}
    name = 'weathers'
    allowed_domains = ['www.tianqi.com']
    citys=['baoding','beijing','shijiazhuang']
    start_urls=[]
    for city in citys:
        start_urls.append('https://www.tianqi.com/'+city)
        print('\n\n')


    def parse(self, response):
        selector=response.xpath('//div[@class="right"]')
        for weather in selector:
            item=WeatherItem()
            item['city'] = weather.xpath('.//div[@class="top"]//h1//text()').extract()
            print('\t\t')
            item['date'] = weather.xpath('.//ul[@class="week"]//li//b//text()').extract()
            print('\t\t')
            item['week'] = weather.xpath('.//ul[@class="week"]//li//span//text()').extract()
            print('\t\t')
            item['img'] = weather.xpath('.//ul[@class="week"]//li//img/@src').extract()
            print('\t\t')
            item['state'] = weather.xpath('.//ul[@class="txt txt2"]//li//text()').extract()
            print('\t\t')
            item['temmax'] = weather.xpath('.//div[@class="zxt_shuju"]//ul//li//span//text()').extract()
            print('\t\t')
            item['temmin'] = weather.xpath('.//div[@class="zxt_shuju"]//ul//li//b//text()').extract()
            print('\t\t')
            item['wind'] = weather.xpath('.//ul[@class="txt"]//li//text()').extract()

            return item

pipeline.py

import os.path
import urllib
class WeatherPipeline(object):
    def process_item(self, item, spider):
        with open('weathers.txt', 'a+') as fp:
            fp.write(str(item['city'])+'\n\n\t')
            fp.write(str(item['date'])+'\n\n\t')
            fp.write(str(item['week'])+'\n\n\t')
            fp.write(str(item['img']) + '\n\n\t')
            imgname=os.path.basename(str(item['img']))
            print("22222222222222222222222222222222")
            print(imgname)
            print("22222222222222222222222222222222222222222")
            fp.write(str(imgname) + '\n\n\t')
            if os.path.exists(imgname):
                print("66666666666666666666666666666666666666666")

            else:
                with open(imgname,'wb') as fp:
                    response=urllib.request.urlopen(str(item['img']))
                    print("99999999999999999999999999999999999999999")
                    print(response)
                    print('999999999999999999999999999999999999999999999')
                    fp.write(response.read())
            fp.write(str(item['state'])+'\n\n\t')
            fp.write(str(item['temmax']) + '\n\n\t')
            fp.write(str(item['temmin']) + '\n\n\t')
            fp.write(str(item['wind']) + '\n\n\t')


        return item
import json
class WeatherPipeline2(object):
    def open_spider(self, spider):
       self.filename=open('weathers.json','a')

    def process_item(self, item, spider):
        content=json.dumps(dict(item),ensure_ascii=False)+'\n\n\t\t'
        self.filename.write(content)
        return item

    def close_spider(self,spider):
       self.filename.close()
'''
import MySQLdb
class WeatherPipeline3(object):
    def process_item(self, item, spider):
        city=item['city'].encode('utf8')
        date=item['date'].encode('utf8')
        week=item['week'].encode('utf8')
        img=os.path.basename(str(item['img']))
        state=item['state'].encode('utf8')
        temmax=item['temmax'].encode('utf8')
        temmin=item['temmin'].encode('utf8')
        wind=item['wind'].encode('utf8')
        conn=MySQLdb.connect(
            host='localhost',
            port=3306,
            user=''
        )

'''

settings.py

BOT_NAME = 'weather'

SPIDER_MODULES = ['weather.spiders']
NEWSPIDER_MODULE = 'weather.spiders'


# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'weather (+http://www.yourdomain.com)'

# Obey robots.txt rules
ROBOTSTXT_OBEY = True

ITEM_PIPELINES = {
   'weather.pipelines.WeatherPipeline': 300,
   'weather.pipelines.WeatherPipeline2': 400
}

璐南熙

关注

0
点赞
踩
7

收藏

觉得还不错? 一键收藏
0
评论
scrapy爬取天气预报

scrapy爬取天气预报item.pyimport scrapyclass WeatherItem(scrapy.Item): # define the fields for your item here like: # name = scrapy.Field() city = scrapy.Field() date = scrapy.Field() ...
复制链接

扫一扫

专栏目录