淘车网—>scrapy
目录
步骤
(一) 设置settings.py
import hashlib
CUSTOM_SETTINGS = {
# robotes协议
'ROBOTSTXT_OBEY': False,
# 请求头
'DEFAULT_REQUEST_HEADERS': {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36 Maxthon/5.2.6.1000',
},
# 下载中间件
# 'DOWNLOADER_MIDDLEWARES': {
# 'hupu.middlewares.HupuDownloaderMiddleware': 543,
# },
#pipelines
'ITEM_PIPELINES' : {
'taoche.pipelines.TaochePipeline': 300,
},
#数据库的url
'MONGO_URI':'localhost',
#数据的名字
'MONGO_DATABASE':'taoche',
}
def get_md5(value):
return hashlib.md5(bytes(value, encoding='utf-8')).hexdigest()
def update_to_mongo(db,collectionName,id,url,item):
if url:
item[id] = get_md5(item[url])
db[collectionName].update({id: item[id]}, {'$set': dict(item)}, True)
print("正在存储..........")
def add_page(m):
return '?page='+str(int(m.group(1))+1)
(二)导入url
CITY_CODE = [
'hefei', 'anqing', 'bengbu', 'chaohu', 'chizhou', 'fuyang', 'huainan', 'luan', 'maanshan', 'tongling', 'wuhu',
'xuancheng', 'chuzhou', 'sz', 'bozhou', 'huaibei', 'huangshan', 'beijing', 'chongqing', 'fuzhou', 'xiamen',
'longyan', 'zhangzhou', 'putian', 'quanzhou', 'nanping', 'ningde', 'sanming', 'guangzhou', 'shenzhen', 'zhuhai',
'dongguan', 'zhongshan', 'shantou', 'shaoguan', 'zhaoqing', 'maoming', 'foshan', 'huizhou', 'jiangmen', 'qingyuan',
'chaozhou', 'zhanjiang', 'meizhou', 'jieyang', 'yunfu', 'yangjiang', 'heyuan', 'shanwei', 'nanning', 'liuzhou',
'guilin', 'beihai', 'baise', 'hezhou', 'hechi', 'guigang', 'yulin', 'qinzhou', 'wuzhou', 'fangchenggang', 'laibin',
'chongzuo', 'guiyang', 'zunyi', 'anshun', 'liupanshui', 'tongrendiqu', 'qiandongnan', 'qiannan', 'bijiediqu',
'qianxinan', 'lanzhou', 'dingxi', 'pingliang', 'jiuquan', 'qingyang', 'baiyin', 'zhangye', 'wuwei', 'tianshui',
'jiayuguan', 'jinchang', 'linxia', 'longnan', 'gannan', 'haikou', 'sanya', 'sanshashi', 'wuhan', 'shiyan',
'xiangfan', 'suizhou', 'yichang', 'huangshi', 'jingmen', 'jingzhou', 'ezhou', 'xianning', 'xiaogan', 'huanggang',
'enshi', 'xiantao', 'tianmen', 'qianjiang', 'hubeizhixiaxian', 'changsha', 'chenzhou', 'changde', 'hengyang',
'huaihua', 'loudi', 'zhuzhou', 'yueyang', 'xiangtan', 'shaoyang', 'yongzhou', 'yiyang', 'zhangjiajie', 'xiangxi',
'zhengzhou', 'luoyang', 'zhoukou', 'xinyang', 'xinxiang', 'shangqiu', 'sanmenxia', 'puyang', 'nanyang', 'luohe',
'jiaozuo', 'kaifeng', 'anyang', 'hebi', 'pingdingshan', 'zhumadian', 'xuchang', 'jiyuan', 'henanzhixiaxian',
'shijiazhuang', 'tangshan', 'xingtai', 'qinhuangdao', 'langfang', 'handan', 'hengshui', 'cangzhou', 'baoding',
'zhangjiakou', 'chengde', 'haerbin', 'daqing', 'qiqihaer', 'hegang', 'jiamusi', 'mudanjiang', 'jixi', 'qitaihe',
'yc', 'heihe', 'shuangyashan', 'suihua', 'daxinganlingdiqu', 'nanjing', 'suzhou', 'wuxi', 'changzhou', 'huaian',
'lianyungang', 'nantong', 'yancheng', 'yangzhou', 'zhenjiang', 'taizhou', 'xuzhou', 'suqian', 'nanchang',
'shangrao', 'pingxiang', 'xinyu', 'yichun', 'jiujiang', 'ganzhou', 'jian', 'jingdezhen', 'jxfz', 'yingtan',
'changchun', 'jilin', 'tonghua', 'liaoyuan', 'songyuan', 'yanbian', 'siping', 'baishan', 'baicheng', 'shenyang',
'dalian', 'dandong', 'fushun', 'fuxin', 'huludao', 'chaoyang', 'benxi', 'anshan', 'jinzhou', 'liaoyang', 'yingkou',
'panjin', 'tieling', 'huhehaote', 'baotou', 'chifeng', 'tongliao', 'wuhai', 'eerduosi', 'bayannaoer', 'wulanchabu',
'xilinguolemeng', 'hulunbeier', 'xinganmeng', 'alashanmeng', 'yinchuan', 'zhongwei', 'wuzhong', 'guyuan',
'shizuishan', 'xining', 'haibei', 'huangnan', 'guoluo', 'yushu', 'haixi', 'haidongdiqu', 'hainanzangzuzizhizho',
'xian', 'xianyang', 'weinan', 'yl', 'baoji', 'hanzhong', 'yanan', 'tongchuan', 'shangluo', 'ankang', 'shanghai',
'taiyuan', 'datong', 'jincheng', 'linfen', 'changzhi', 'yuncheng', 'xinzhou', 'shuozhou', 'lvliang', 'jinzhong',
'yangquan', 'chengdu', 'mianyang', 'suining', 'panzhihua', 'yibin', 'zigong', 'ziyang', 'deyang', 'leshan',
'nanchong', 'meishan', 'bazhong', 'luzhou', 'neijiang', 'dazhou', 'yaan', 'guangyuan', 'guangan', 'aba', 'ganzi',
'liangshan', 'jinan', 'dezhou', 'qingdao', 'yantai', 'weihai', 'weifang', 'taian', 'zaozhuang', 'zibo', 'dongying',
'heze', 'binzhou', 'liaocheng', 'linyi', 'jining', 'rizhao', 'laiwu', 'tianjin', 'wulumuqi', 'kelamayi', 'bazhou',
'yili', 'kashidiqu', 'akesudiqu', 'hetiandiqu', 'tachengdiqu', 'tulufandiqu', 'hamidiqu', 'aletaidiqu',
'xinjiangkezhou', 'changji', 'shihezi', 'xinjiangzhixiaxian', 'lasa', 'rikazediqu', 'shannan', 'kunming', 'qujing',
'baoshan', 'xishuangbanna', 'honghe', 'dali', 'yuxi', 'lincang', 'wenshan', 'zhaotong', 'lijiang', 'dehong',
'nujiang', 'diqing', 'puer', 'chuxiong', 'hangzhou', 'ningbo', 'wenzhou', 'jiaxing', 'jinhua', 'lishui', 'huzhou',
'quzhou', 'tz', 'shaoxing', 'yiwu', 'zhoushan', 'hefei', 'anqing', 'bengbu', 'chaohu', 'chizhou', 'fuyang',
'huainan', 'luan', 'maanshan', 'tongling', 'wuhu', 'xuancheng', 'chuzhou', 'sz', 'bozhou', 'huaibei', 'huangshan',
'beijing', 'chongqing', 'fuzhou', 'xiamen', 'longyan', 'zhangzhou', 'putian', 'quanzhou', 'nanping', 'ningde',
'sanming', 'guangzhou', 'shenzhen', 'zhuhai', 'dongguan', 'zhongshan', 'shantou', 'shaoguan', 'zhaoqing', 'maoming',
'foshan', 'huizhou', 'jiangmen', 'qingyuan', 'chaozhou', 'zhanjiang', 'meizhou', 'jieyang', 'yunfu', 'yangjiang',
'heyuan', 'shanwei', 'nanning', 'liuzhou', 'guilin', 'beihai', 'baise', 'hezhou', 'hechi', 'guigang', 'yulin',
'qinzhou', 'wuzhou', 'fangchenggang', 'laibin', 'chongzuo', 'guiyang', 'zunyi', 'anshun', 'liupanshui',
'tongrendiqu', 'qiandongnan', 'qiannan', 'bijiediqu', 'qianxinan', 'lanzhou', 'dingxi', 'pingliang', 'jiuquan',
'qingyang', 'baiyin', 'zhangye', 'wuwei', 'tianshui', 'jiayuguan', 'jinchang', 'linxia', 'longnan', 'gannan',
'haikou', 'sanya', 'sanshashi', 'wuhan', 'shiyan', 'xiangfan', 'suizhou', 'yichang', 'huangshi', 'jingmen',
'jingzhou', 'ezhou', 'xianning', 'xiaogan', 'huanggang', 'enshi', 'xiantao', 'tianmen', 'qianjiang',
'hubeizhixiaxian', 'changsha', 'chenzhou', 'changde', 'hengyang', 'huaihua', 'loudi', 'zhuzhou', 'yueyang',
'xiangtan', 'shaoyang', 'yongzhou', 'yiyang', 'zhangjiajie', 'xiangxi', 'zhengzhou', 'luoyang', 'zhoukou',
'xinyang', 'xinxiang', 'shangqiu', 'sanmenxia', 'puyang', 'nanyang', 'luohe', 'jiaozuo', 'kaifeng', 'anyang',
'hebi', 'pingdingshan', 'zhumadian', 'xuchang', 'jiyuan', 'henanzhixiaxian', 'shijiazhuang', 'tangshan', 'xingtai',
'qinhuangdao', 'langfang', 'handan', 'hengshui', 'cangzhou', 'baoding', 'zhangjiakou', 'chengde', 'haerbin',
'daqing', 'qiqihaer', 'hegang', 'jiamusi', 'mudanjiang', 'jixi', 'qitaihe', 'yc', 'heihe', 'shuangyashan', 'suihua',
'daxinganlingdiqu', 'nanjing', 'suzhou', 'wuxi', 'changzhou', 'huaian', 'lianyungang', 'nantong', 'yancheng',
'yangzhou', 'zhenjiang', 'taizhou', 'xuzhou', 'suqian', 'nanchang', 'shangrao', 'pingxiang', 'xinyu', 'yichun',
'jiujiang', 'ganzhou', 'jian', 'jingdezhen', 'jxfz', 'yingtan', 'changchun', 'jilin', 'tonghua', 'liaoyuan',
'songyuan', 'yanbian', 'siping', 'baishan', 'baicheng', 'shenyang', 'dalian', 'dandong', 'fushun', 'fuxin',
'huludao', 'chaoyang', 'benxi', 'anshan', 'jinzhou', 'liaoyang', 'yingkou', 'panjin', 'tieling', 'huhehaote',
'baotou', 'chifeng', 'tongliao', 'wuhai', 'eerduosi', 'bayannaoer', 'wulanchabu', 'xilinguolemeng', 'hulunbeier',
'xinganmeng', 'alashanmeng', 'yinchuan', 'zhongwei', 'wuzhong', 'guyuan', 'shizuishan', 'xining', 'haibei',
'huangnan', 'guoluo', 'yushu', 'haixi', 'haidongdiqu', 'hainanzangzuzizhizho', 'xian', 'xianyang', 'weinan', 'yl',
'baoji', 'hanzhong', 'yanan', 'tongchuan', 'shangluo', 'ankang', 'shanghai', 'taiyuan', 'datong', 'jincheng',
'linfen', 'changzhi', 'yuncheng', 'xinzhou', 'shuozhou', 'lvliang', 'jinzhong', 'yangquan', 'chengdu', 'mianyang',
'suining', 'panzhihua', 'yibin', 'zigong', 'ziyang', 'deyang', 'leshan', 'nanchong', 'meishan', 'bazhong', 'luzhou',
'neijiang', 'dazhou', 'yaan', 'guangyuan', 'guangan', 'aba', 'ganzi', 'liangshan', 'jinan', 'dezhou', 'qingdao',
'yantai', 'weihai', 'weifang', 'taian', 'zaozhuang', 'zibo', 'dongying', 'heze', 'binzhou', 'liaocheng', 'linyi',
'jining', 'rizhao', 'laiwu', 'tianjin', 'wulumuqi', 'kelamayi', 'bazhou', 'yili', 'kashidiqu', 'akesudiqu',
'hetiandiqu', 'tachengdiqu', 'tulufandiqu', 'hamidiqu', 'aletaidiqu', 'xinjiangkezhou', 'changji', 'shihezi',
'xinjiangzhixiaxian', 'lasa', 'rikazediqu', 'shannan', 'kunming', 'qujing', 'baoshan', 'xishuangbanna', 'honghe',
'dali', 'yuxi', 'lincang', 'wenshan', 'zhaotong', 'lijiang', 'dehong', 'nujiang', 'diqing', 'puer', 'chuxiong',
'hangzhou', 'ningbo', 'wenzhou', 'jiaxing', 'jinhua', 'lishui', 'huzhou', 'quzhou', 'tz', 'shaoxing', 'yiwu',
'zhoushan'
]
CAR_CODE_LIST = [
'abt', 'acschnitzer-319', 'alfaromeo', 'always', 'iconiqmotors', 'alpina', 'audi', 'apex',
'arcfox-289', 'astonmartin', 'aspark', 'honda', 'bmw', 'mercedesbenz', 'buick', 'peugeot', 'bj',
'barbus', 'bac', 'byton', 'baolong', 'porsche', 'proton', 'borgward', 'bjqc', 'qxev', 'changheauto',
'daoda-282', 'beiqihuansu', 'shenbao', 'ww', 'beiqixinnengyuan', 'beijingjeep', 'besturn', 'byvinauto',
'bentley', 'pininfarina', 'bisuqiche-263', 'bydauto', 'xingchi', 'bordrin', 'bollingermotors',
'bugatti', 'caterham', 'changankuayue-283', 'casyc', 'cajc', 'changanqingxingche-281', 'greatwall',
'speedauto', 'chenggongqiche', 'cupra', 'czinger', 'volkswagen', 'dorcen', 'dacia', 'dadiauto',
'daihatsu-355', 'dodge', 'dayu', 'dayun', 'dearcc', 'dongfeng-27', 'dongfengfengdu',
'dongfengfengguang', 'fs', 'fengxingauto', 'dongfengfukang', 'dongfengruitaite',
'dongfengxiaokang-205', 'southeastautomobile', 'donkervoort', 'ds', 'toyota', 'ford', 'ferrari',
'fiat', 'maple', 'fisker-369', 'foday', 'fuqiautomobile', 'fujianxinlongmaqichegufenyouxiangongsi',
'foton', 'humanhorizons', 'grove', 'gfgstyle', 'gmc-109', 'galue', 'gq', 'gonow', 'gacgroup',
'guangqihinomotors', 'gacne', 'qorosauto', 'sinogold', 'guojizhijun', 'nevs', 'gyon',
'hafeiautomobile', 'hafu-196', 'higer', 'haige1', 'hama', 'haimu', 'hummer', 'hanlong', 'hanteng',
'evergrandeneoenergy', 'chtc', 'hennessey', 'hispanosuiza', 'faw-hongqi', 'redstar', 'htyt', 'huakai',
'sgautomotive', 'sma', 'horki', 'huasong', 'huataiautomobile', 'shanghaihuizhong-45', 'hycan', 'icona',
'inkas', 'jac', 'jmc', 'jianglinglvjuche', 'jauger', 'jetta', 'genesis-357', 'jetour', 'geometry',
'geely', 'jinbei', 'jinchengautomobile', 'kinglongmotor', 'jlkc', 'jeep', 'joylongautomobile', 'traum',
'juntiansuv', 'iat-314', 'kaersen', 'cadillac', 'karry', 'skywell-342', 'kaiyi', 'karma-330',
'zhejiangkaersen', 'kawei', 'chrysler', 'koenigsegg', 'ktm', 'lada', 'ladaa', 'lamborghini',
'lancia-392', 'voyah', 'rolls-royce', 'levdeo', 'lexus', 'renult', 'renaultsamsungmotors', 'levc',
'lotus-146', 'cf', 'lifanmotors', 'lingbox', 'lynkco', 'suzuki', 'leapmotor', 'linktour', 'everus',
'lincoln', 'li', 'lordstownmotors', 'lucid', 'landwind', 'landrover', 'rolfhartge', 'lorinser',
'lotus', 'mahindra', 'maybach', 'mclaren', 'mansory', 'maserati', 'mazda', 'micro', 'mg-79', 'mini',
'morgancars', 'nanqixinyatu1', 'hozon', 'luxgen', 'opel', 'acura', 'ora', 'ol', 'pagani', 'pgo-379',
'polestar', 'puritalia', 'chautotechnology', 'qiaozhibadun-339', 'venucia', 'jdmc', 'isuzu', 'qyev',
'chery', 'kia', 'kangdi', 'nissan', 'rimac', 'rivian', 'roewe', 'ruf', 'ruichixinnengyuan', 'riich',
'saab', 'saleen', 'mitsubishi', 'sfmotors', 'maxus', 'shangqisaibao', 'shangqiyuejin',
'shanqitongjia1', 'sj', 'sceo', 'ssangyong', 'subaru', 'sol', 'skoda', 'ciimo', 'smart', 'srm', 'ssc',
'suda-399', 'sony', 'siwei', 'tking', 'tata', 'tesla', 'skywell-394', 'tianqimeiya', 'denza',
'troller', 'vantas', 'vegainnovations', 'vinfast', 'uaz', 'wey', 'eurise', 'vgv', 'weilaiqiche',
'rely', 'weltmeister', 'wiesmann', 'volvo', 'vauxhall', 'sgmw', 'isuzu-132', 'hyundai', 'xpeng',
'baojun-359', 'exeed', 'xinkaiauto', 'sitech', 'seat', 'chevrolet', 'citroen', 'ym', 'infiniti',
'enranger', 'yinlong', 'faw', 'yiqijiefang-409', 'iveco', 'jonwayautomobile', 'yuanchengqiche', 'yulu',
'yudo', 'yunqueqiche', 'yusheng-258', 'yutong', 'zenvo-347', 'zhidou', 'zhinuo', 'sinotruk',
'brillianceauto', 'zotyeauto', 'zxauto', 'polarsunautomobile'
]
(三)spiders组件
import re
import scrapy,socket
from .city import CITY_CODE,CAR_CODE_LIST
from ..my_settings import CUSTOM_SETTINGS,add_page
class TaocheScrapySpider(scrapy.Spider):
name = 'taoche_scrapy'
myaddr=socket.gethostbyname(socket.gethostname())
# allowed_domains = ['www']
custom_settings = CUSTOM_SETTINGS
start_urls = []
for city in CITY_CODE:
for car in CAR_CODE_LIST:
start_urls.append(f'https://{city}.taoche.com/{car}/?page=1')
def parse(self, response):
li_list=response.xpath('//ul[@class="gongge_ul"]/li')
if li_list:
for li in li_list:
try:
car_title=li.xpath('.//div[@class="gongge_main"]/a/@title').extract_first()
car_url=li.xpath('.//div[@class="gongge_main"]/a/@href').extract_first()
car_now_price=li.xpath('.//div[@class="price"]/i[1]/text()').extract_first()
car_old_price=li.xpath('.//div[@class="price"]/i[2]/text()').extract_first()
info=li.xpath('.//div[@class="gongge_main"]/p/i/text()').extract()
car_years=info[0]
car_licheng=info[1]
car_product_area=info[2].strip()
item={}
item['car_title']=car_title
item['car_now_price']=car_now_price
item['car_old_price']=car_old_price
item['car_years']=car_years
item['car_licheng']=car_licheng
item['car_product_area']=car_product_area
item['ip']=self.myaddr
item['car_url']=car_url
yield item
except:
pass
#请求下一页
now_url=response.url
next_url=re.sub(r'\?page=(\d+)',add_page,now_url)
yield scrapy.Request(next_url,callback=self.parse,encoding='utf-8')
(四)items组件
# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html
import scrapy
class TaocheItem(scrapy.Item):
# define the fields for your item here like:
car_title = scrapy.Field()
car_now_price = scrapy.Field()
car_old_price = scrapy.Field()
car_years = scrapy.Field()
car_licheng = scrapy.Field()
car_product_area = scrapy.Field()
myaddr = scrapy.Field()
car_url = scrapy.Field()
car_id = scrapy.Field()
(五)管道组件
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
import pymongo
from itemadapter import ItemAdapter
from .my_settings import update_to_mongo
class TaochePipeline:
def __init__(self, mongo_uri, mongo_db):
self.mongo_uri = mongo_uri
self.mongo_db = mongo_db
@classmethod
def from_crawler(cls, crawler):
return cls(
mongo_uri=crawler.settings.get('MONGO_URI'),
mongo_db=crawler.settings.get('MONGO_DATABASE', 'items')
)
def open_spider(self, spider):
self.client = pymongo.MongoClient(self.mongo_uri)
self.db = self.client[self.mongo_db]
def close_spider(self, spider):
self.client.close()
def process_item(self, item, spider):
update_to_mongo(self.db,'car','car_id','car_url',item)
return item
(六)调试
scrapy crawl taoche_scrapy
(七)效果图
淘车网(scrapy-redis)
步骤
这里只需要修改几个地方即可
settings
import hashlib
CUSTOM_SETTINGS = {
# robotes协议
'ROBOTSTXT_OBEY': False,
# 请求头
'DEFAULT_REQUEST_HEADERS': {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36 Maxthon/5.2.6.1000',
},
# 下载中间件
# 'DOWNLOADER_MIDDLEWARES': {
# 'hupu.middlewares.HupuDownloaderMiddleware': 543,
# },
#pipelines
'ITEM_PIPELINES' : {
'taoche.pipelines.TaochePipeline': 300,
},
#数据库的url
'MONGO_URI':'localhost',
#数据的名字
'MONGO_DATABASE':'taoche',
#新加
#配置scrapy-redis调度器
'SCHEDULER':'scrapy_redis.scheduler.Scheduler',
#配置url去重
'DUPEFILTER_CLASS':'scrapy_redis.dupefilter.RFPDupeFilter',
#配置优先级队列
'SCHEDULER_QUEUE_CLASS':'scrapy_redis.queue.PriorityQueue',
#Redis端口
'REDIS_PORT':6379,
'REDIS_HOST':'localhost',
}
def get_md5(value):
return hashlib.md5(bytes(value, encoding='utf-8')).hexdigest()
def update_to_mongo(db,collectionName,id,url,item):
if url:
item[id] = get_md5(item[url])
db[collectionName].update({id: item[id]}, {'$set': dict(item)}, True)
print("正在存储..........")
def add_page(m):
return '?page='+str(int(m.group(1))+1)
初始化
import redis
from .city import *
redis_=redis.Redis()
#初始化
for city in CITY_CODE:
for car in CAR_CODE_LIST:
url=f'https://{city}.taoche.com/{car}/?page=1'
redis_.lpush('taoche:start_urls',url)
spiders组件
import re
import scrapy,socket
from scrapy_redis import spiders
from .city import CITY_CODE,CAR_CODE_LIST
from ..my_settings import CUSTOM_SETTINGS,add_page
class TaocheScrapySpider(spiders.RedisSpider):
name = 'taoche_scrapy'
myaddr=socket.gethostbyname(socket.gethostname())
custom_settings = CUSTOM_SETTINGS
redis_key = 'taoche:start_urls'
def parse(self, response):
li_list=response.xpath('//ul[@class="gongge_ul"]/li')
if li_list:
for li in li_list:
try:
car_title=li.xpath('.//div[@class="gongge_main"]/a/@title').extract_first()
car_url=li.xpath('.//div[@class="gongge_main"]/a/@href').extract_first()
car_now_price=li.xpath('.//div[@class="price"]/i[1]/text()').extract_first()
car_old_price=li.xpath('.//div[@class="price"]/i[2]/text()').extract_first()
info=li.xpath('.//div[@class="gongge_main"]/p/i/text()').extract()
car_years=info[0]
car_licheng=info[1]
car_product_area=info[2].strip()
item={}
item['car_title']=car_title
item['car_now_price']=car_now_price
item['car_old_price']=car_old_price
item['car_years']=car_years
item['car_licheng']=car_licheng
item['car_product_area']=car_product_area
item['ip']=self.myaddr
item['car_url']=car_url
yield item
except:
pass
#请求下一页
now_url=response.url
next_url=re.sub(r'\?page=(\d+)',add_page,now_url)
yield scrapy.Request(next_url,callback=self.parse,encoding='utf-8')
-
关注微信公众号【爱上开源】,该公众号会为你提供作者在网上找到有趣的开源项目,会将使用过程写成文章呈现给读者.公众号还提供爬虫和部分计算机资源给读者.如果读者想要什么资源可以私信给我,作者会尽力查询(不要涉嫌违法资源即可)