2020-11-07

利用scrapy框架吧数据存入到文本,json,csv,mysql等

  1. 存入到文本中
class TextPipeline(object):
    def open_spider(self,spider):
        self.fp=open('./sun.txt','w',encoding='utf-8')
    def process_item(self,item,spider):
        dic=dict(item)
        self.fp.write(dic+"\n")
        return item
    def close_spider(self,spider):
        self.fp.close()

2.存入到json中

class JsonPipeline(object):
    def open_spider(self,spider):
        self.fp=open("sun.json",'w',encoding='utf-8')
    def process_item(self,item,spider):
        dic=dict(item)
        line=json.dumps(dic,ensure_ascii=False)+"\n"
        self.fp.write(line)
        return item
    def close_spider(self,spider):
        self.fp.close()

3.存入到csv中

class CsvPipeline(object):
    def open_spider(self,spdier):
        self.fp=open("./sun.csv","w",encoding='utf-8',newline='')
        self.writer=csv.writer(self.fp)
    def process_item(self,item,spider):
        line=(item['name_size'],item['name_min'],item['name_url'],item['book_img'],item['book_source'],item['book_talk'],item['book_press'])
        self.writer.writerow(line)
        return item

4.存入到mysql中

class MysqlPipeline(object):
    def open_spider(self):
        self.conn=pymysql.Connect(host='localhost',post=3306,user='root',password='123456',db='sun',charset='utf8')
    def process_item(self,item,spider):
        self.cursor=self.conn.cursor()
        sql='insert into sun values("%s","%s","%s","%s","%s","%s","%s") %(item["name_min"],item["name_url"],item["book_img"],item["book_source"],item["book_talk"],item["book_press"],item["name_size"])'
        try:
            self.cursor.execute(sql)
            self.conn.commit()
        except Exception as e:
            self.conn.rollback()
        return item
    def close_spider(self,spider):
        self.cursor.close()
        self.conn.close()

5.存入到MongDB中

class MongoPipeline(object):
    def open_spider(self,spider):
        self.client=pymongo.MongoClient(host='localhost',port=27017)
        db=self.client.sun
        self.collection=db.sunning
    def process_item(self,item,spider):
        dic=dict(item)
        self.collection.insert(dic)
    def close_spider(self):
        self.client.close()

6.存入到Redis中

from scrapy.utils.project import get_project_settings
class RedisPipeline(object):
    def open_spider(self):
        settings=get_project_settings()
        self.conn=redis.StrictRedis(host=settings['REDIS_HOST'],post=settings['REDIS_POST'])
    def process_item(self,item,spider):
        dic=dict(item)
        self.conn.lpush("Sun",dic)
        return item
    def close_item(self,spider):
        self.conn.connection_pool.disconnect()

6.1在settings中设置

REDIS_HOST='localhost'
REDIS_POST='6379'
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值