安装scrapy
pip install Scrapy
本教程以爬取中国天气网信息为例
1.创建scrapy项目命令
scrapy startproject 项目名
2.创建spider命令
scrapy genspider spider文件名 访问的域名
3.定义Item
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class Demo03Item2(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
#时间
date = scrapy.Field()
#天气状态
state = scrapy.Field()
#温度
temp = scrapy.Field()
#风向
wind = scrapy.Field()
#相对湿度
humidity =scrapy.Field()
#空气质量
air = scrapy.Field()
#降水量
amount = scrapy.Field()
#风速
speed = scrapy.Field()
#城市名称
city = scrapy.Field()
#区号
areacode = scrapy.Field()
pass
编写自己的爬虫文件
进入刚刚创建的爬虫文件中编写爬虫项目
下面是我写的样例
import scrapy
from datashape import null
import pandas as pd
from demo03.items02 import Demo03Item2
import datetime
import json
import os
class WeatherSpider(scrapy.Spider):
name = "weather3"
#得到城市数据
# def fileload(filename='weather.csv'):
# csvfile = open(filename,encoding='GBK')
# data = csv.reader(csvfile)
# dataset = []
# for line in data:
# dataset.