存储数据库Redis-行政区划代码-5级12位-最终版

最新推荐文章于 2023-05-15 11:10:39 发布

CY3761

最新推荐文章于 2023-05-15 11:10:39 发布

阅读量428

点赞数

分类专栏： python爬虫文章标签： python 数据库 redis

本文链接：https://blog.csdn.net/weixin_63272654/article/details/121561880

版权

python爬虫专栏收录该内容

23 篇文章 0 订阅

订阅专栏

import json
import os
import time
import redis

joinStr = '\n'
fileSep = ','
fileDataEncoding = 'utf-8'


def timePlus():
    t = time.time()
    
    return '%s %.4f' % (time.strftime('%H:%M:%S', time.localtime(t)), t - int(t))


def getFileData(filePath):
    if os.path.exists(filePath):
        with open(filePath, 'r', encoding=fileDataEncoding) as r:
            fileData = r.readlines()
            
            # 这里改成生成器
            def newFileData():
                for _ in fileData:
                    if _:
                        _ = _.replace(joinStr, '').split(fileSep)
                        
                        _a = _[0]  # 名称
                        _b = _[1]  # 上级代码
                        _c = _[2]  # 当前代码
                        
                        yield json.dumps({
                            'code': _c,
                            'pcode': _b,
                            'name': _a
                        })
            
            return newFileData()


dataFilePath = r'行政区划代码-2020.txt'


def main():
    print('开始执行 当前时间: %s' % timePlus())
    
    if not os.path.exists(dataFilePath):
        raise Exception('数据文件不存在')
    
    print('数据读取开始 当前时间: %s' % timePlus())
    
    items = getFileData(dataFilePath)
    
    print('数据读取完成 当前时间: %s' % timePlus())
    
    print(items)
    
    r = redis.Redis(host='127.0.0.1', port=6379)
    
    print('数据存储开始 当前时间: %s' % timePlus())
    
    key = 'items'
    
    for k, _ in enumerate(items):
        r.rpush(key, _)
        print('\r%s 数据存储 %s 当前时间: %s' % (str(k).zfill(6), str(r.llen(key)).zfill(6), timePlus()), end='')
    
    print('数据存储结束 当前时间: %s' % timePlus())

    # 读取
    items = r.lrange(key, 0, -1)
    for k, _ in enumerate(d1):
        _ = json.loads(_.decode('unicode_escape'))
        
        print('\rcode: %s, pcode: %s, name: %s 当前时间: %s ' % (_.get('code'), _.get('pcode'), _.get('name'), timePlus()), end='')


if __name__ == '__main__':
    try:
        main()
        pass
    except (Exception, BaseException) as e:
        print(e)

在这里插入图片描述

这部分环节算完了，三种文件保存，三种数据库保存；mysql和redis都是循环插入，感觉mysql插入慢很多，redis还没找到批量一次性插入的，mongodb和mysql都有的，后续可能再加个mysql的批量插入，看看速度如何

CY3761

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
存储数据库Redis-行政区划代码-5级12位-最终版

import jsonimport osimport timeimport redisjoinStr = '\n'fileSep = ','fileDataEncoding = 'utf-8'def timePlus(): t = time.time() return '%s %.4f' % (time.strftime('%H:%M:%S', time.localtime(t)), t - int(t))def getFileData(filePath)
复制链接

扫一扫