1.移动指定的文件到指定的目标文件夹
import os
import shutil
def move_xml_files(source_folder, destination_folder):
# 检查源文件夹是否存在
if not os.path.exists(source_folder):
print("源文件夹不存在")
return
# 检查目标文件夹是否存在,如果不存在则创建
if not os.path.exists(destination_folder):
os.makedirs(destination_folder)
# 遍历源文件夹及其子文件夹
for root, dirs, files in os.walk(source_folder):
# 遍历当前文件夹中的所有文件
for file in files:
# 拼接文件的完整路径
file_path = os.path.join(root, file)
# 检查是否为文件并且以.xml结尾
if os.path.isfile(file_path) and file.endswith(".xml"):
# 移动文件到目标文件夹
shutil.move(file_path, destination_folder)
print("移动完成")
# 指定源文件夹和目标文件夹的路径
source_folder = ""
destination_folder = ""
# 调用函数进行移动操作
move_xml_files(source_folder, destination_folder)
import os
import glob
import shutil
def move_files_with_same_extension(source_folder, target_folder, extension):
# 获取源文件夹中具有指定后缀的文件列表
files = glob.glob(os.path.join(source_folder, f"*.{extension}"))
# 移动文件到目标文件夹
for file in files:
shutil.move(file, target_folder)
print(f"已移动文件到目标文件夹:{file}")
print("文件移动完成。")
# 源文件夹
source_folder = "/source/folder" # 替换为源文件夹的实际路径
# 目标文件夹
target_folder = "/target/folder" # 替换为目标文件夹的实际路径
# 后缀
extension = "jpg" # 替换为你想移动的文件的后缀
move_files_with_same_extension(source_folder, target_folder, extension)
2.移动指定文件名字的文件
一、正则的方法
import os
import re
import shutil
source_folder = r'' # 源文件夹路径
destination_folder = r'' # 目标文件夹路径
# 遍历源文件夹下的所有文件
for root, dirs, files in os.walk(source_folder):
for file in files:
# 使用正则表达式提取文件名中第一个括号内的内容
match = re.search(r'\((.*?)\)', file)
if match:
first_bracket_content = match.group(1)
# 如果第一个括号内容为'-1,-1'
if first_bracket_content == '-1,-1':
# continue
# 构建源文件的完整路径和目标文件的完整路径
source_file = os.path.join(root, file)
destination_file = os.path.join(destination_folder, file)
# 移动文件到目标文件夹
shutil.move(source_file, destination_file)
print(f"Moved file: {source_file} to {destination_file}")
二、split的方法
import os
import re
import shutil
source_folder = '' # 源文件夹路径
destination_folder = '' # 目标文件夹路径
# 遍历源文件夹下的所有文件
for root, dirs, files in os.walk(source_folder):
for file in files:
first_bracket_content = file.split('(', 1)[1].split(')', 1)[0]
if first_bracket_content == '-1,-1':
source_file = os.path.join(root, file)
destination_file = os.path.join(destination_folder, file)
# 移动文件到目标文件夹
shutil.move(source_file, destination_file)
print(f"Moved file: {source_file} to {destination_file}")
3.文件名字匹配检查
import os
import glob
import shutil
def check_matching_files(xml_folder, image_folder, target_folder):
xml_files = glob.glob(os.path.join(xml_folder, "*.xml"))
image_files = glob.glob(os.path.join(image_folder, "*.bmp"))
xml_filenames = [os.path.splitext(os.path.basename(file))[0] for file in xml_files]
image_filenames = [os.path.splitext(os.path.basename(file))[0] for file in image_files]
# 检查不匹配的文件
unmatched_xml_files = [file for file in xml_files if os.path.splitext(os.path.basename(file))[0] not in image_filenames]
unmatched_image_files = [file for file in image_files if os.path.splitext(os.path.basename(file))[0] not in xml_filenames]
# 移动不匹配的文件到目标文件夹
for file in unmatched_xml_files:
shutil.move(file, target_folder)
print(f"已移动XML文件到目标文件夹:{file}")
for file in unmatched_image_files:
shutil.move(file, target_folder)
print(f"已移动图片文件到目标文件夹:{file}")
print("文件匹配检查完成。")
# XML 文件所在的文件夹
xml_folder = '' # 替换为 XML 文件所在的文件夹路径
# 图片文件所在的文件夹
image_folder = '' # 替换为图片文件所在的文件夹路径
# 移动到的目标文件夹
target_folder = '' # 替换为移动文件的目标文件夹路径
check_matching_files(xml_folder, image_folder, target_folder)
4.删除xml文件中没有object的文件及对应的图片
import os
import glob
import xml.etree.ElementTree as ET
def delete_file_and_image_without_object(xml_file, image_folder):
tree = ET.parse(xml_file)
root = tree.getroot()
if len(root.findall('object')) == 0:
# 删除 XML 文件
os.remove(xml_file)
# 删除同名的图片文件
image_file = os.path.join(image_folder, os.path.splitext(os.path.basename(xml_file))[0] + '.jpg')
if os.path.exists(image_file):
os.remove(image_file)
print(f"已删除文件:{xml_file} 和 {image_file}")
# 遍历 XML 文件所在的目录
xml_folder = "/xml/files" # 替换为 XML 文件所在的目录路径
xml_files = glob.glob(os.path.join(xml_folder, "*.xml"))
# 图片文件所在的目录
image_folder = "/image/files" # 替换为图片文件所在的目录路径
for xml_file in xml_files:
delete_file_and_image_without_object(xml_file, image_folder)
5.删除文件名字中带有tra的文件
import os
def delete_files_with_tra(image_folder, xml_folder):
# 获取图片文件夹中的所有文件
image_files = os.listdir(image_folder)
image_files = [f for f in image_files if f.endswith(".jpg") and "tra" in f]
# 获取 XML 文件夹中的所有文件
xml_files = os.listdir(xml_folder)
xml_files = [f for f in xml_files if f.endswith(".xml") and "tra" in f]
for image_file in image_files:
# 构建图片文件路径
image_path = os.path.join(image_folder, image_file)
# 删除图片文件
os.remove(image_path)
print(f"已删除图片文件:{image_file}")
for xml_file in xml_files:
# 构建 XML 文件路径
xml_path = os.path.join(xml_folder, xml_file)
# 删除 XML 文件
os.remove(xml_path)
print(f"已删除 XML 文件:{xml_file}")
# 图片文件夹路径
image_folder = "/image/folder" # 图片文件夹路径
# XML 文件夹路径
xml_folder = "/xml/folder" # XML 文件夹路径
delete_files_with_tra(image_folder, xml_folder)
6.对xml文件解析错误的进行处理
import os
import glob
import xml.etree.ElementTree as ET
import shutil
def process_xml_file(xml_file, xml_folder, image_folder, target_folder):
try:
tree = ET.parse(xml_file)
root = tree.getroot()
# 在这里进行其他处理
print(f"已处理文件:{xml_file}")
except ET.ParseError as e:
print(f"解析XML文件出错:{xml_file}\n{str(e)}")
move_files(xml_file, xml_folder, image_folder, target_folder)
def move_files(xml_file, xml_folder, image_folder, target_folder):
# 移动XML文件
xml_filename = os.path.basename(xml_file)
new_xml_path = os.path.join(target_folder, xml_filename)
shutil.move(xml_file, new_xml_path)
# 移动对应的图片文件
image_file = os.path.join(image_folder, os.path.splitext(xml_filename)[0] + '.jpg')
if os.path.exists(image_file):
image_filename = os.path.basename(image_file)
new_image_path = os.path.join(target_folder, image_filename)
shutil.move(image_file, new_image_path)
print(f"已移动文件:{xml_file} 和 {image_file}")
# XML 文件所在的文件夹
xml_folder = "/xml/files" # 替换为 XML 文件所在的文件夹路径
xml_files = glob.glob(os.path.join(xml_folder, "*.xml"))
# 图片文件所在的文件夹
image_folder = "/image/files" # 替换为图片文件所在的文件夹路径
# 移动到的目标文件夹
target_folder = "/target/folder" # 替换为移动文件到的目标文件夹路径
for xml_file in xml_files:
process_xml_file(xml_file, xml_folder, image_folder, target_folder)
7.删除xml文件中的乱码文字
import os
import glob
import xml.etree.ElementTree as ET
def contains_invalid_characters(text):
try:
text.encode('utf-8').decode('utf-8')
except UnicodeDecodeError:
return True
return False
def remove_invalid_characters(xml_file):
tree = ET.parse(xml_file)
root = tree.getroot()
xml_text = ET.tostring(root, encoding='utf-8').decode('utf-8')
if contains_invalid_characters(xml_text):
# 删除乱码文字
cleaned_xml_text = ''.join(char for char in xml_text if char.isprintable())
root.text = cleaned_xml_text
# 保存修改后的XML文件
tree.write(xml_file, encoding='utf-8')
print(f"已删除乱码文字:{xml_file}")
# 遍历目录中的所有XML文件
xml_files = glob.glob("/xml/files/*.xml") # 替换为你的XML文件所在的目录路径
for xml_file in xml_files:
remove_invalid_characters(xml_file)