直接上代码
# -*- coding: UTF-8 -*-
import pandas as pd
import configparser
import csv
from py2neo import Graph, Node, Relationship
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def knn():
# 读取文件路径与目的文件路径
txt_path = '../data/data.txt'
csv_path = "../data/data.csv"
#首先设置文件头
with open(csv_path, 'w') as csvfile:
header = ['subject','predicate', 'object']
writer = csv.writer(csvfile)
writer.writerow(header)
csvfile.close()
#读取文件,默认一行数据 \t 分隔, error_bad_lines可能是跳过报错行
reader = pd.read_table(txt_path, iterator=True, encoding='utf8', error_bad_lines=False)
#循环读取
loop = True
while loop:
try:
#每次读取5000个
chunk = reader.get_chunk(5000)
#mode='a' 追加读取, index=False 舍去行数
chunk.to_csv(csv_path, mode='a', index=False, sep=',', header=None, encoding='utf_8_sig')
except StopIteration:
loop = False
print("Iteration is stopped.")
#rows = 59824257
if __name__ == '__main__':
knn()