import os
import re
import pyodbc
import logging
from datetime import datetime
# 日志配置
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('csv_import.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def get_record_file_path(input_dir):
"""获取位于输入目录下的记录文件路径"""
return os.path.join(input_dir, "processed_files.log")
def load_processed_files(input_dir):
"""从输入目录加载已处理过的文件名集合"""
record_file = get_record_file_path(input_dir)
processed_files = set()
if os.path.exists(record_file):
try:
with open(record_file, 'r') as f:
for line in f:
filename = line.strip()
if filename: # 忽略空行
processed_files.add(filename)
logger.info(f"已加载 {len(processed_files)} 个已处理文件记录")
except Exception as e:
logger.error(f"加载已处理文件记录失败: {str(e)}")
else:
logger.info(f"未找到已处理文件记录 {record_file},将创建新记录")
return processed_files
def record_processed_file(filename, input_dir):
"""在输入目录中记录已处理的文件名"""
record_file = get_record_file_path(input_dir)
try:
with open(record_file, 'a') as f:
f.write(f"{filename}\n")
logger.debug(f"已记录处理文件: {filename} (记录文件: {record_file})")
except Exception as e:
logger.error(f"记录已处理文件失败: {filename} - {str(e)}")
def get_db_connection():
"""创建数据库连接"""
try:
conn = pyodbc.connect(
"DRIVER={ODBC Driver 17 for SQL Server};"
"SERVER=192.168.10.116;"
"DATABASE=现场DB;"
"UID=sa;"
"PWD=gtec_6600"
)
logger.info("数据库连接成功")
return conn
except pyodbc.Error as e:
logger.error(f"数据库连接失败: {str(e)}")
raise
def parse_filename(filename):
"""解析文件名获取零件号和时间戳"""
try:
parts = filename.split('_')
if len(parts) < 2:
raise ValueError("文件名格式不正确")
partno = parts[-2]
timestamp = parts[-1].split('.')[0] # 去除扩展名
# 验证时间戳格式
if not re.match(r'^\d{14}$', timestamp):
raise ValueError(f"无效的时间戳格式: {timestamp}")
logger.debug(f"解析字段 - 工件号: {partno}, 时间戳: {timestamp}")
return {'partno': partno, 'timestamp': timestamp}
except Exception as e:
logger.error(f"解析文件名失败: {filename} - {str(e)}")
raise
def find_op_prefix(lines, start_idx):
"""向上查找最近的OP前缀"""
logger.debug(f"开始向上查找OP前缀,起始行: {start_idx}")
# 从start_idx行向上搜索,直到文件开头
for i in range(start_idx, -1, -1):
line = lines[i].strip()
logger.debug(f"检查行 {i}: {line}")
# 检查行是否包含"OP"
if "OP" in line:
# 提取OP前缀(第一个"-"之前的部分)
parts = line.split(" ")
if parts:
op_prefix = parts[0].strip()
if op_prefix:
logger.info(f"找到OP前缀: {op_prefix} (行 {i})")
return op_prefix
logger.warning("未找到有效的OP前缀")
return None
def validate_data_row(row):
"""验证数据行是否有效"""
if len(row) < 6:
return False
# 检查数值字段
try:
float(row[3]) # 上限值
float(row[4]) # 下限值
return True
except ValueError:
return False
def process_csv(file_path):
"""处理CSV文件并提取起始行之前的数据"""
try:
with open(file_path, 'r', encoding='ANSI') as f:
lines = f.readlines()
logger.info(f"读取文件: {os.path.basename(file_path)}, 总行数: {len(lines)}")
# 1. 查找包含"/////////"的数据起始行
info_line = -1
for i, line in enumerate(lines):
if "/////////" in line:
info_line = i
logger.info(f"找到数据起始行: {i} - {line.strip()}")
break
if info_line == -1:
raise ValueError("未找到包含'/////////'的数据起始标记")
# 只处理起始行之前的数据(行号小于info_line)
logger.info(f"只处理起始行之前的数据(行号小于 {info_line})")
# 2. 向上查找最近的OP前缀
current_op = find_op_prefix(lines, info_line)
if not current_op:
raise ValueError("未找到有效的OP前缀")
processed_data = []
filename = os.path.basename(file_path)
file_info = parse_filename(filename)
# 3. 从起始行之前开始处理数据(行号小于info_line)
# 注意:从文件开头到起始行-1
for i in range(0, info_line): # 只处理起始行之前的数据
line = lines[i].strip()
# 跳过空行和分隔行
if not line or any(x in line for x in ["////", "===="]):
continue
# 4. 如果遇到新的OP行,更新当前OP前缀
if "OP" in line and "-" in line:
parts = line.split(" ")
if parts:
new_op = parts[0].strip()
if new_op:
current_op = new_op
logger.debug(f"更新当前OP前缀为: {current_op}")
continue
# 5. 处理数据行
if line and "," in line:
parts = [p.strip() for p in line.split(",")]
if len(parts) >= 9:
# 6. 如果第一列不是以OP开头,添加当前OP前缀
if not parts[0].startswith("OP"):
parts[0] = f"{current_op}"
logger.debug(f"添加OP前缀: {parts[0]}")
# 7. 选择需要的列(1,2,3,6,7,9)
filtered_row = [
parts[0], # 工序号 (带OP前缀)
parts[1], # 内容
parts[2], # 公差类型
parts[5], # 上限值
parts[6], # 下限值
parts[8] # 测量值
]
# 8. 验证并添加有效数据
if validate_data_row(filtered_row):
processed_data.append(filtered_row)
else:
logger.warning(f"跳过无效数据行: {line}")
if not processed_data:
raise ValueError("未提取到有效数据")
logger.info(f"成功处理 {len(processed_data)} 行数据")
return processed_data, file_info
except UnicodeDecodeError:
logger.error("文件编码错误,尝试使用UTF-8重新读取")
try:
with open(file_path, 'r', encoding='utf-8') as f:
# 简化的重试逻辑
pass
except:
raise ValueError("文件编码不支持ANSI和UTF-8")
except Exception as e:
logger.error(f"处理CSV文件失败: {os.path.basename(file_path)} - {str(e)}")
raise
def save_to_db(data, file_info):
"""保存数据到数据库"""
conn = None
try:
conn = get_db_connection()
cursor = conn.cursor()
# 创建表(如果不存在)
cursor.execute("""
IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='T_IPC')
CREATE TABLE T_IPC (
line VARCHAR(50) NOT NULL,
part VARCHAR(50) NOT NULL,
part_no VARCHAR(20) NOT NULL,
time_stamp VARCHAR(20) NOT NULL,
content VARCHAR(100) NOT NULL,
tolerance_type VARCHAR(100) NOT NULL,
OP VARCHAR(20) NOT NULL,
up_limit FLOAT NOT NULL,
low_limit FLOAT NOT NULL,
data FLOAT NOT NULL,
offset FLOAT NOT NULL,
create_time DATETIME DEFAULT GETDATE(),
result VARCHAR(10)
)
""")
create_time = datetime.now()
# 批量插入数据
insert_sql = """
INSERT INTO T_IPC (line, part, part_no, time_stamp, content, \
tolerance_type, OP, up_limit, low_limit, \
data, offset, create_time) \
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) \
"""
params = []
for row in data:
try:
# 提取内容(移除编号前缀)
content = row[1]
if ')' in row[1]:
content = row[1].split(')', maxsplit=1)[-1].strip()
# 计算偏移量
upper = float(row[3])
lower = float(row[4])
value = float(row[5])
# 避免除零错误
if (upper - lower) == 0:
offset = 0.0
else:
offset = (value - lower) / (upper - lower)
params.append((
'2', # 生产线
'曲轴', # 零件类型
file_info['partno'], # 零件号
file_info['timestamp'], # 时间戳
content, # 内容
row[2], # 公差类型
row[0].replace('.', '').split('-')[0], # 工序号
upper, # 上限值
lower, # 下限值
value, # 测量值
offset, # 偏移量计算
create_time # 创建时间
))
except (ValueError, IndexError, TypeError) as e:
logger.error(f"数据转换失败: {row} - {str(e)}")
if params:
cursor.executemany(insert_sql, params)
conn.commit()
logger.info(f"成功插入 {len(params)} 条记录到数据库")
else:
logger.warning("没有有效数据插入数据库")
except pyodbc.Error as e:
logger.error(f"数据库操作失败: {str(e)}")
if conn:
conn.rollback()
raise
finally:
if conn:
conn.close()
def process_files():
"""处理目录中的所有CSV文件(不剪切文件,只记录文件名)"""
input_dir = r"D:\csv\#2\曲轴"
# 加载已处理文件记录 - 现在从input_dir获取
processed_files = load_processed_files(input_dir)
processed_count = 0
error_count = 0
for filename in os.listdir(input_dir):
file_path = os.path.join(input_dir, filename)
# 检查是否是CSV文件且未被处理过
if filename.lower().endswith('.csv') and filename not in processed_files:
try:
logger.info(f"开始处理文件: {filename}")
data, file_info = process_csv(file_path)
save_to_db(data, file_info)
# 记录已处理文件 - 指定input_dir
record_processed_file(filename, input_dir)
processed_count += 1
logger.info(f"文件处理成功: {filename}")
except Exception as e:
error_count += 1
logger.error(f"文件处理失败: {filename} - {str(e)}")
# 保留原始文件,不移动
else:
if filename.lower().endswith('.csv'):
logger.debug(f"跳过文件: {filename} (已在处理记录中)")
logger.info(f"处理完成! 成功: {processed_count}, 失败: {error_count}")
# 重新加载以获取最新计数
processed_files = load_processed_files(input_dir)
logger.info(f"当前已处理文件总数: {len(processed_files)}")
if __name__ == "__main__":
try:
process_files()
except Exception as e:
logger.critical(f"程序异常终止: {str(e)}")
修改为不记录文件名,每次处理完文件之后剪切走
最新发布