python 将大文件分割成小文件

将1000多万条的txt数据,按照行数分割

lines_per_file = 300
smallfile = None
with open('really_big_file.txt') as bigfile:
    for lineno, line in enumerate(bigfile):
        if lineno % lines_per_file == 0:
            if smallfile:
                smallfile.close()
            small_filename = 'small_file_{}.txt'.format(lineno + lines_per_file)
            smallfile = open(small_filename, "w")
        smallfile.write(line)
    if smallfile:
        smallfile.close()

#展示当前目录的所有txt
import glob
print(glob.glob(".\*.txt"))

结合postgresql完成批量插入

import psycopg2
import os
import threading
class postgres:
    def __init__(self):
        self.host="10.108.184.33"
        self.port=5432
        self.username="vsphere"
        self.password="viadmin"
        self.db="vsphere_info"
        
    def conn(self):
        self.con=psycopg2.connect(host=self.host,port=self.port,user=self.username,password=self.password,database=self.db)
        self.curor=self.con.cursor()
        
    def run(self,sql):
        for item in sql:
            self.curor.execute(item)
                    
        self.con.commit()

        # with open('')
        # self.curor.execute(sql)
        
    def close(self):
        self.curor.close()
        self.con.close()
        
        



# lines_per_file = 300
# smallfile = None
# with open('.\\无标题.sql') as bigfile:
#     for lineno, line in enumerate(bigfile):
#         if lineno % lines_per_file == 0:
#             if smallfile:
#                 smallfile.close()
#             small_filename = 'small_file_{}.txt'.format(lineno + lines_per_file)
#             smallfile = open(small_filename, "w")
#         smallfile.write(line.replace('system_vm_daily','itt.system_vm_daily'))
#     if smallfile:
#         smallfile.close()

def test(item):
    try:
        with open(item,"r") as f:
            line= f.readlines()
            pg = postgres()
            pg.conn()
            pg.run(line)
            pg.close()
            print(item)
        os.remove(item)
    except:
        pass

import glob

th = []
for item in glob.glob(".\*.txt"):
    t = threading.Thread(target=test,args=(item,))
    t.start()
    th.append(t)
    
for i in th:
    i.join()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值