scrapy startproject dangdang
scrapy genspider -t basic cao dangdang.com
# -*- coding: utf-8 -*-# Define here the models for your scraped items## See documentation in:# http://doc.scrapy.org/en/latest/topics/items.htmlimport scrapy
classDangdangItem(scrapy.Item):# define the fields for your item here like:# name = scrapy.Field()#name 表示商品名称
name = scrapy.Field()
# 商品价格
price = scrapy.Field()
# 商品链接地址
link = scrapy.Field()
# -*- coding: utf-8 -*-import scrapy
from dangdang.items import DangdangItem
from scrapy.http import Request
import json
classCaoSpider(scrapy.Spider):
name = "cao"
allowed_domains = ["dangdang.com"]
start_urls = (
'http://category.dangdang.com/pg1-cid4008149.html',
)
defparse(self, response):
item = DangdangItem()
item['name'] = response.xpath("//p[@class='name']/a[@name='itemlist-title']/text()").extract()
item['price'] = response.xpath("//p[@class='price']/span[@class='price_n']/text()").extract()
item['link'] = response.xpath('//a[@class="pic"]/@href').extract()
# 提取完成后返回itemyield item
for i in range(1,10):
url = "http://category.dangdang.com/pg"+str(i)+"-cid4008149.html"yield Request(url,callback=self.parse)
for i in range(100):
print(item['name'][i]+'----' +item['price'][i] + '---' + item['link'][i])
scrapy startproject dangdangscrapy genspider -t basic cao dangdang.com# -*- coding: utf-8 -*-# Define here the models for your scraped items## See documentation in:# http://doc.scrapy.org/en/latest