python 将大文件分割成小文件

最新推荐文章于 2024-05-30 10:17:04 发布

会飞的尼古拉斯

最新推荐文章于 2024-05-30 10:17:04 发布

阅读量597

点赞数

分类专栏： Python

本文链接：https://blog.csdn.net/weixin_43632687/article/details/112915638

版权

Python 数据处理 PostgreSQL 批量插入多线程

关键词由CSDN通过智能技术生成

Python 专栏收录该内容

127 篇文章 6 订阅

订阅专栏

将1000多万条的txt数据，按照行数分割

lines_per_file = 300
smallfile = None
with open('really_big_file.txt') as bigfile:
    for lineno, line in enumerate(bigfile):
        if lineno % lines_per_file == 0:
            if smallfile:
                smallfile.close()
            small_filename = 'small_file_{}.txt'.format(lineno + lines_per_file)
            smallfile = open(small_filename, "w")
        smallfile.write(line)
    if smallfile:
        smallfile.close()

#展示当前目录的所有txt
import glob
print(glob.glob(".\*.txt"))

结合postgresql完成批量插入

import psycopg2
import os
import threading
class postgres:
    def __init__(self):
        self.host="10.108.184.33"
        self.port=5432
        self.username="vsphere"
        self.password="viadmin"
        self.db="vsphere_info"
        
    def conn(self):
        self.con=psycopg2.connect(host=self.host,port=self.port,user=self.username,password=self.password,database=self.db)
        self.curor=self.con.cursor()
        
    def run(self,sql):
        for item in sql:
            self.curor.execute(item)
                    
        self.con.commit()

        # with open('')
        # self.curor.execute(sql)
        
    def close(self):
        self.curor.close()
        self.con.close()
        
        



# lines_per_file = 300
# smallfile = None
# with open('.\\无标题.sql') as bigfile:
#     for lineno, line in enumerate(bigfile):
#         if lineno % lines_per_file == 0:
#             if smallfile:
#                 smallfile.close()
#             small_filename = 'small_file_{}.txt'.format(lineno + lines_per_file)
#             smallfile = open(small_filename, "w")
#         smallfile.write(line.replace('system_vm_daily','itt.system_vm_daily'))
#     if smallfile:
#         smallfile.close()

def test(item):
    try:
        with open(item,"r") as f:
            line= f.readlines()
            pg = postgres()
            pg.conn()
            pg.run(line)
            pg.close()
            print(item)
        os.remove(item)
    except:
        pass

import glob

th = []
for item in glob.glob(".\*.txt"):
    t = threading.Thread(target=test,args=(item,))
    t.start()
    th.append(t)
    
for i in th:
    i.join()

会飞的尼古拉斯

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
3
评论
python 将大文件分割成小文件

将1000多万条的txt数据，按照行数分割lines_per_file = 300smallfile = Nonewith open('really_big_file.txt') as bigfile: for lineno, line in enumerate(bigfile): if lineno % lines_per_file == 0: if smallfile: smallfile.close()
复制链接

扫一扫

专栏目录