1.下载一个csv文件:http://yum.iqianyue.com/weisuenbook/pyspd/part12/mydata.csv
2.创建项目mycsv
..............myfirstspjt>scrapy startproject mycsv
3.修改items文件:创建name用来存储名字信息,sex用来存储性别
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class MycsvItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
name = scrapy.Field()
sex = scrapy.Field()
pass
4.在cmd中列出可用爬虫模板
scrapy genspider -l
5.创建csvfeed爬虫模板,处理csv文件
scrapy genspider -t csvfeed mycsvspider iqianyue.com
6.更改文件mycsvspider.py
# -*- coding: utf-8 -*-
from scrapy.spiders import CSVFeedSpider
from mycsv.items import MycsvItem
class MycsvspiderSpider(CSVFeedSpider):
name = 'mycsvspider'
allowed_domains = ['iqianyue.com']
# start_urls = ['http://www.iqianyue.com/feed.csv']
start_urls = ['http://yum.iqianyue.com/weisuenbook/pyspd/part12/mydata.csv']
#定义headers
headers = ['name','sex','addr','email']
#定义间隔符
delimiter = ','
# headers = ['id', 'name', 'description', 'image_link']
# delimiter = '\t'
# Do any adaptations you need here
#def adapt_response(self, response):
# return response
def parse_row(self, response, row):
i = MycsvItem()
i['name'] = row['name'].encode()
i['sex'] = row['sex'].encode()
print("名字是:")
print(i['name'])
print("性别是:")
print(i['sex'])
print("-------------------------------")
#i['url'] = row['url']
#i['name'] = row['name']
#i['description'] = row['description']
return i
7.cmd执行文件
scrapy crawl mycsvspider --nolog