爬取淘宝商品信息,昨天利用selenium和PhantomJS爬取了淘宝页面,但是速度方面有些欠缺,所以今天简单的利用scrapy框架来实现同样功能,并同样保存到mongo数据库中
import scrapy
import reimport pymongo
from taobao.items import TaobaoItem
class WeisuenSpider(scrapy.Spider):
name = 'taobao_'
start_url = "https://s.taobao.com/search?q=%E5%A5%B3%E8%A3%85&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.50862.201856-taobao-item.1&ie=utf8&initiative_id=staobaoz_20180309&ie=utf8&bcoffset=4&ntoffset=4&p4ppushleft=1%2C48"
detail_urls=[]data=[]
client=pymongo.MongoClient("localhost",27017)
db=client.taobao
db=db.nvz
def start_requests(self):
for i in range(100):#爬100页数据