python脚本将txt文本转为sql并简单校验数据

1.在脚本所在的目录下创建数据目录,比如202004,表示为今年4月的数据目录,5月则创建202005,以此类推(这个目录必须事先创建)
2.将原始的数据文件放入到这个目录(文件的编码格式为utf-8,不是的话要先使用notebook打开,然后转为utf-8)
3.运行脚本,脚本运行成功则会在202004目录下生成一个新的202004_sql目录,相应的sql文件也在这里面
4.进入202004_sql目录,连接到数据库mysql -uroot -p(回车后输入数据库密码)
5.在相应的数据库下运行这些sql文件
6.无报错表示数据导入成功

#!/usr/bin/python 
# translate txt_data to sql
#
import io
import os

# filename = input("filename:")
# print(filename)
base_dir = os.getcwd()
date_dir = input('date_dir:')
# 于当月目录下创建以%Y%m为前缀,以_sql为后缀的子目录
sql_dir = base_dir + "/" + date_dir + "/" + date_dir + '_sql'
if os.path.exists(sql_dir):
  pass
else:
  os.mkdir(base_dir + "/" + date_dir + "/" + date_dir + '_sql')
def convert_data(filename):
  sql_file = filename + ".sql"
  with io.open(filename, 'r+', encoding='utf-8') as f:
    for line in f:
      # 先把每行数据的逗号替换为&,然后再添加逗号作为分隔符
      # 过滤空行
      if line:
        line = line.replace(',', '&')
        line = line.replace('`', ',')
        L1 = line.split(',')
        # 粗略处理,字段不符合规范的数据在后面填充NULL
        if len(L1) < 58:
          x = 58 - len(L1)
          for _ in range(x):
            L1.append('NULL')
        # 空字段替换为NULL
        for i in L1:
          if i == '':
            i = "NULL"
        S1 = tuple(x for x in L1)
        sql = "insert into " + "table_" + date_dir + "(`msg_type`, `sy_fn`, `trans_date`, `SP_code`, `trans_handle_code`, `ser_condition_code`, `trans_type_zh`, `acc_trans_date`, `acc_trans_time`, `trans_amount`, `term_no`, `term_serial_no`, `mer_code`, `term_no_cups`, `mer_code_cups`, `pay_code_type`, `pay_no`, `bill_no_type`, `bill_no`, `user_area_code`, `user_att_area_code`, `card_no`, `industry_org_code`, `bank_code`, `term_org_trans_flow`, `system_org_trans_flow`, `liq_date`, `Ret_Reference_no`, `Auth_code_38`, `54_domain`, `Receive_mechanism_id`, `trans_status`, `cups_response_code`, `Acc_response_code`, `Industry_respons_code`, `trans_channel_60`, `trans_code`, `acc_reservation_122`, `multi_channel_error_code`, `add_industry_info`, `retain_add_info`, `send_mechanism_code`, `std_bus_model`, `std_enter_org_name`, `std_bus_no`, `std_bus_name`, `std_bus_auth_no`, `std_user_no_types`, `std_user_no`, `std_user_area_no`, `std_user_no_area_add_code`, `rece_institution_id_code`, `convert_org_code`, `msg_resv_fld`, `mer_code2`, `mix`, `ind_mer_code`, `mer_pay_add`) values{};\n".format(S1)
        sql_file = os.path.join(sql_dir, sql_file)
        with open(sql_file, 'a+') as e:
          e.write(sql) 
  # convert the data to sql

def check_data():
  """
  统计data行数和转换后的sql语句行数
  """
  check_dir = os.path.join(base_dir, date_dir)
  files = os.listdir(check_dir)
  os.chdir(check_dir)
  SUM = 0
  for filename in files:
    if not os.path.isdir(filename):
      with open(filename, 'r') as f:
        n = len(f.readlines())
        SUM += n
  print("source_data_lines:", SUM)
  
  sql_files = os.listdir(sql_dir)
  os.chdir(sql_dir)
  count = 0
  for file in sql_files:
    if not os.path.isdir(file):
      with open(file, 'r') as e:
        m = len(e.readlines())
        count += m
  print("sql_data_lines:", count)



if __name__ == '__main__':
  source_data_dir = os.path.join(base_dir, date_dir)
  files = os.listdir(source_data_dir)
  os.chdir(source_data_dir)
  for filename in files:
    if not os.path.isdir(filename):
      convert_data(filename)
      # pass
  check_data()  


展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 大白 设计师: CSDN官方博客
应支付0元
点击重新获取
扫码支付

支付成功即可阅读