问题:字段中存在部分数据有多余字符=“和”,需要判断进行删除
环境安装
from sqlalchemy import create_engine
import sqlalchemy
con = sqlalchemy.create_engine("mysql+pymysql://root:root@localhost/e_commerce")
1.少量数据的修改和保存(pandas)
sql="select * from `京东结算单`"
df=pd.read_sql(sql,con=con)
df = df.fillna('')
def del_fl(x)->str:
if isinstance(x, str):
x=x.strip()
if len(x)>4 and x.startswith('="') and x.endswith('"'):
x=x[2:-2]
return x
df = df.map(del_fl)
df.to_sql('京东结算单2', con=con, if_exists='replace', index=False)
2.大量数据的修改和保存
先语句测试
INSERT INTO `结算单` (订单编号)VALUES ('="0123456789"');
UPDATE 京东结算单
SET 订单编号 =
LEFT(SUBSTRING(订单编号, 3) ,LENGTH(订单编号)-3 )
WHERE LENGTH(订单编号) >= 5 and 订单编号 LIKE '="%' and 订单编号 LIKE '%"' and 订单编号='="207874359556"' LIMIT 1;
利用python批量生成语句
import pandas as pd
sql="select * from `结算单` limit 1"
df=pd.read_sql(sql,con=con)
a=list(df.columns)
mark=[]
for i,item in enumerate(a):
x=df.loc[0,item]
if isinstance(x, str):
x=x.strip()
if len(x)>4 and x.startswith('="') and x.endswith('"'):
mark.append(item)
mark
t_name = "结算单"
cols = mark
for i in cols:
text = f"""UPDATE {t_name}
SET {i} =
LEFT(SUBSTRING({i}, 3) ,LENGTH({i})-3 )
WHERE LENGTH({i}) >= 5 and {i} LIKE '="%' and {i} LIKE '%"' ;\n"""
print(text)
问题,mysql中字段存在\n\r\t结尾,需要清除
import pymysql
# 数据库连接配置
db_config = {
'host': 'localhost',
'port': 3306,
'user': 'root',
'passwd': 'root',
'db': 'e_commerce',
'charset': 'utf8'
}
if __name__ == '__main__':
# 连接到数据库
connection = pymysql.connect(**db_config)
with connection.cursor() as cursor:
cursor.execute("show tables")
table_list = [tb[0] for tb in cursor.fetchall()]
for table in table_list:
with connection.cursor() as cursor:
# 获取需要处理的表名和列名
sql = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s"
cursor.execute(sql, ('e_commerce', table))
# data=cursor.fetchall()
columns = [row[0] for row in cursor.fetchall()]
# 构建UPDATE语句
update_sql = f"UPDATE `{table}` SET "
for col in columns:
# update_sql += f"{col} = TRIM(REPLACE(REPLACE({col}, '\\n', ''), '\\t', '')), "
# update_sql += f"`{col}` = TRIM(REPLACE(`{col}`, CHAR(13), ' ')), "
update_sql += f"{col} = TRIM(REPLACE(REPLACE(REPLACE({col}, CHAR(9), ' '), CHAR(10), ' '), CHAR(13), ' ')), "
# 去掉最后一个逗号
update_sql = update_sql[:-2] + ";"
print(update_sql)