import pandas as pd
import sqlite3
import os
import json
from tqdm import tqdm
col_name_file = r"E:\code1\press_column_names.txt"
data_dir = r"E:\code1\Defect2.0"
# 读取列名
col_names = []
with open(col_name_file, 'r') as f:
# Read the content of the file line by line
for line in f.readlines():
# Split the line into words
col_names.append(line.strip())
print(col_names)
# 建立数据库连接
conn = sqlite3.connect('data3.db')
t = []
i = 0
for root, dirs, files in os.walk(data_dir):
for file in tqdm(files, ncols=80):
if not file.endswith(".json"):
continue
with open(os.path.join(root, file), "r", encoding="utf-8") as f:
jsonData = json.load(f)
defectDir = os.path.dirname(root).split(os.path.sep)[-1]
for item in jsonData:
defectName_1 = item.get('defectName-1', "")
defectName_2 = item.get('defectName-2', "")
defectName_3 = item.get('defectName-3', "")
if "异物" in item.get('problemCause-1', "") or "异物" in item.get('problemCause-2', ""):
continue
if "锡" not in defectName_1 and "锡" not in defectName_2 and defectName_1 != "":
continue
row = []
for col_name in col_names:
row.append(item.get(col_name, ""))
row.append(defectDir)
t.append(row)
i += 1
if len(t) == 500000:
df = pd.DataFrame(t, columns=col_names + ["defectDir"])
df.to_sql("data_defect2", conn, if_exists="append")
t = []
df = pd.DataFrame(t, columns=col_names + ["defectDir"])
df.to_sql("data_defect2", conn, if_exists="append")
t = []
conn.close()