代码需求
- 对比csv文件前4列,重复的仅保留一行
- 对处理完的csv文件按照规范重新命名
code
# -*- coding:utf-8 -*-
# --------------------------------
# @Time : 2021/10/15 22:41
# @Author : HaoWu
# @File : rmsame.py
# --------------------------------
import os
import re
import pandas as pd
def test():
path='./nor'
namelis = os.listdir(path)
for name in namelis:
df = pd.read_csv('{path}/{name}'.format(path=path, name=name))
df = df.drop_duplicates(subset=['1', '2', '3', '4'], keep='first') # 1 2 3 4表示前四列名称
namepre = name[:-4]
namepre = re.sub('\正常.txt', 'nor', namepre) # 文件名正则化,将文件名中正常.txt替换为nor
namepre = re.sub('\.', '_', namepre) # 文件名正则化,将文件名中所有"."换成"_"
name1 = namepre + '.csv'
df.to_csv('{path}/{name}'.format(path=path, name=name1), index=False)
test()