网页
#创建项目
$scrapy startproject mypjt
#基于basic模板创建名为xxx的爬虫文件
$ scrapy genspider -t basic xxx sina.com.cn
html格式
class CaoItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
urlname = scrapy.Field()
urlkey = scrapy.Field()
urlcr = scrapy.Field()
urladd = scrapy.Field()
# 可以从命令行指定输入的地址
class AbcSpider(scrapy.Spider):
name = 'ab