将1000多万条的txt数据,按照行数分割
lines_per_file = 300
smallfile = None
with open('really_big_file.txt') as bigfile:
for lineno, line in enumerate(bigfile):
if lineno % lines_per_file == 0:
if smallfile:
smallfile.close()
small_filename = 'small_file_{}.txt'.format(lineno + lines_per_file)
smallfile = open(small_filename, "w")
smallfile.write(line)
if smallfile:
smallfile.close()
#展示当前目录的所有txt
import glob
print(glob.glob(".\*.txt"))
结合postgresql完成批量插入
import psycopg2
import os
import threading
class postgres:
def __init__(self):
self.host="10.108.184.33"
self.port=5432
self.username="vsphere"
self.password="viadmin"
self.db="vsphere_info"
def conn(self):
self.con=psycopg2.connect(host=self.host,port=self.port,user=self.username,password=self.password,database=self.db)
self.curor=self.con.cursor()
def run(self,sql):
for item in sql:
self.curor.execute(item)
self.con.commit()
# with open('')
# self.curor.execute(sql)
def close(self):
self.curor.close()
self.con.close()
# lines_per_file = 300
# smallfile = None
# with open('.\\无标题.sql') as bigfile:
# for lineno, line in enumerate(bigfile):
# if lineno % lines_per_file == 0:
# if smallfile:
# smallfile.close()
# small_filename = 'small_file_{}.txt'.format(lineno + lines_per_file)
# smallfile = open(small_filename, "w")
# smallfile.write(line.replace('system_vm_daily','itt.system_vm_daily'))
# if smallfile:
# smallfile.close()
def test(item):
try:
with open(item,"r") as f:
line= f.readlines()
pg = postgres()
pg.conn()
pg.run(line)
pg.close()
print(item)
os.remove(item)
except:
pass
import glob
th = []
for item in glob.glob(".\*.txt"):
t = threading.Thread(target=test,args=(item,))
t.start()
th.append(t)
for i in th:
i.join()