data_process

1.移动指定的文件到指定的目标文件夹

import os
import shutil

def move_xml_files(source_folder, destination_folder):
    # 检查源文件夹是否存在
    if not os.path.exists(source_folder):
        print("源文件夹不存在")
        return
    # 检查目标文件夹是否存在,如果不存在则创建
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
    # 遍历源文件夹及其子文件夹
    for root, dirs, files in os.walk(source_folder):
        # 遍历当前文件夹中的所有文件
        for file in files:
            # 拼接文件的完整路径
            file_path = os.path.join(root, file)

            # 检查是否为文件并且以.xml结尾
            if os.path.isfile(file_path) and file.endswith(".xml"):
                # 移动文件到目标文件夹
                shutil.move(file_path, destination_folder)

    print("移动完成")

# 指定源文件夹和目标文件夹的路径
source_folder = ""
destination_folder = ""
# 调用函数进行移动操作
move_xml_files(source_folder, destination_folder)

import os
import glob
import shutil
def move_files_with_same_extension(source_folder, target_folder, extension):
    # 获取源文件夹中具有指定后缀的文件列表
    files = glob.glob(os.path.join(source_folder, f"*.{extension}"))
    # 移动文件到目标文件夹
    for file in files:
        shutil.move(file, target_folder)
        print(f"已移动文件到目标文件夹:{file}")
    print("文件移动完成。")
# 源文件夹
source_folder = "/source/folder"  # 替换为源文件夹的实际路径
# 目标文件夹
target_folder = "/target/folder"  # 替换为目标文件夹的实际路径
# 后缀
extension = "jpg"  # 替换为你想移动的文件的后缀
move_files_with_same_extension(source_folder, target_folder, extension)

2.移动指定文件名字的文件

一、正则的方法

import os
import re
import shutil

source_folder = r''  # 源文件夹路径
destination_folder = r''  # 目标文件夹路径

# 遍历源文件夹下的所有文件
for root, dirs, files in os.walk(source_folder):
    for file in files:
        # 使用正则表达式提取文件名中第一个括号内的内容
        match = re.search(r'\((.*?)\)', file)
        if match:
            first_bracket_content = match.group(1)
            # 如果第一个括号内容为'-1,-1'
            if first_bracket_content == '-1,-1':
                # continue

        		# 构建源文件的完整路径和目标文件的完整路径
                source_file = os.path.join(root, file)
                destination_file = os.path.join(destination_folder, file)

       			 # 移动文件到目标文件夹
                shutil.move(source_file, destination_file)
                print(f"Moved file: {source_file} to {destination_file}")

二、split的方法

import os
import re
import shutil
source_folder = ''  # 源文件夹路径
destination_folder = ''  # 目标文件夹路径

# 遍历源文件夹下的所有文件
for root, dirs, files in os.walk(source_folder):
    for file in files:
        first_bracket_content = file.split('(', 1)[1].split(')', 1)[0]
        if first_bracket_content == '-1,-1':
            source_file = os.path.join(root, file)
            destination_file = os.path.join(destination_folder, file)

            # 移动文件到目标文件夹
            shutil.move(source_file, destination_file)
            print(f"Moved file: {source_file} to {destination_file}")

3.文件名字匹配检查

import os
import glob
import shutil

def check_matching_files(xml_folder, image_folder, target_folder):
    xml_files = glob.glob(os.path.join(xml_folder, "*.xml"))
    image_files = glob.glob(os.path.join(image_folder, "*.bmp"))

    xml_filenames = [os.path.splitext(os.path.basename(file))[0] for file in xml_files]
    image_filenames = [os.path.splitext(os.path.basename(file))[0] for file in image_files]

    # 检查不匹配的文件
    unmatched_xml_files = [file for file in xml_files if os.path.splitext(os.path.basename(file))[0] not in image_filenames]
    unmatched_image_files = [file for file in image_files if os.path.splitext(os.path.basename(file))[0] not in xml_filenames]

    # 移动不匹配的文件到目标文件夹
    for file in unmatched_xml_files:
        shutil.move(file, target_folder)
        print(f"已移动XML文件到目标文件夹:{file}")

    for file in unmatched_image_files:
        shutil.move(file, target_folder)
        print(f"已移动图片文件到目标文件夹:{file}")

    print("文件匹配检查完成。")

# XML 文件所在的文件夹
xml_folder = ''  # 替换为 XML 文件所在的文件夹路径
# 图片文件所在的文件夹
image_folder = ''  # 替换为图片文件所在的文件夹路径

# 移动到的目标文件夹
target_folder = ''  # 替换为移动文件的目标文件夹路径

check_matching_files(xml_folder, image_folder, target_folder)

4.删除xml文件中没有object的文件及对应的图片

import os
import glob
import xml.etree.ElementTree as ET
def delete_file_and_image_without_object(xml_file, image_folder):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    if len(root.findall('object')) == 0:
        # 删除 XML 文件
        os.remove(xml_file)
        # 删除同名的图片文件
        image_file = os.path.join(image_folder, os.path.splitext(os.path.basename(xml_file))[0] + '.jpg')
        if os.path.exists(image_file):
            os.remove(image_file)
        print(f"已删除文件:{xml_file} 和 {image_file}")
# 遍历 XML 文件所在的目录
xml_folder = "/xml/files"  # 替换为 XML 文件所在的目录路径
xml_files = glob.glob(os.path.join(xml_folder, "*.xml"))
# 图片文件所在的目录
image_folder = "/image/files"  # 替换为图片文件所在的目录路径
for xml_file in xml_files:
    delete_file_and_image_without_object(xml_file, image_folder)

5.删除文件名字中带有tra的文件

import os

def delete_files_with_tra(image_folder, xml_folder):
    # 获取图片文件夹中的所有文件
    image_files = os.listdir(image_folder)
    image_files = [f for f in image_files if f.endswith(".jpg") and "tra" in f]

    # 获取 XML 文件夹中的所有文件
    xml_files = os.listdir(xml_folder)
    xml_files = [f for f in xml_files if f.endswith(".xml") and "tra" in f]

    for image_file in image_files:
        # 构建图片文件路径
        image_path = os.path.join(image_folder, image_file)

        # 删除图片文件
        os.remove(image_path)

        print(f"已删除图片文件:{image_file}")

    for xml_file in xml_files:
        # 构建 XML 文件路径
        xml_path = os.path.join(xml_folder, xml_file)

        # 删除 XML 文件
        os.remove(xml_path)

        print(f"已删除 XML 文件:{xml_file}")

# 图片文件夹路径
image_folder = "/image/folder"  # 图片文件夹路径
# XML 文件夹路径
xml_folder = "/xml/folder"  #  XML 文件夹路径
delete_files_with_tra(image_folder, xml_folder)

6.对xml文件解析错误的进行处理

import os
import glob
import xml.etree.ElementTree as ET
import shutil


def process_xml_file(xml_file, xml_folder, image_folder, target_folder):
    try:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        # 在这里进行其他处理
        print(f"已处理文件:{xml_file}")
    except ET.ParseError as e:
        print(f"解析XML文件出错:{xml_file}\n{str(e)}")
        move_files(xml_file, xml_folder, image_folder, target_folder)


def move_files(xml_file, xml_folder, image_folder, target_folder):
    # 移动XML文件
    xml_filename = os.path.basename(xml_file)
    new_xml_path = os.path.join(target_folder, xml_filename)
    shutil.move(xml_file, new_xml_path)

    # 移动对应的图片文件
    image_file = os.path.join(image_folder, os.path.splitext(xml_filename)[0] + '.jpg')
    if os.path.exists(image_file):
        image_filename = os.path.basename(image_file)
        new_image_path = os.path.join(target_folder, image_filename)
        shutil.move(image_file, new_image_path)

    print(f"已移动文件:{xml_file} 和 {image_file}")


# XML 文件所在的文件夹
xml_folder = "/xml/files"  # 替换为 XML 文件所在的文件夹路径
xml_files = glob.glob(os.path.join(xml_folder, "*.xml"))

# 图片文件所在的文件夹
image_folder = "/image/files"  # 替换为图片文件所在的文件夹路径

# 移动到的目标文件夹
target_folder = "/target/folder"  # 替换为移动文件到的目标文件夹路径

for xml_file in xml_files:
    process_xml_file(xml_file, xml_folder, image_folder, target_folder)

7.删除xml文件中的乱码文字

import os
import glob
import xml.etree.ElementTree as ET


def contains_invalid_characters(text):
    try:
        text.encode('utf-8').decode('utf-8')
    except UnicodeDecodeError:
        return True
    return False


def remove_invalid_characters(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    xml_text = ET.tostring(root, encoding='utf-8').decode('utf-8')

    if contains_invalid_characters(xml_text):
        # 删除乱码文字
        cleaned_xml_text = ''.join(char for char in xml_text if char.isprintable())
        root.text = cleaned_xml_text

        # 保存修改后的XML文件
        tree.write(xml_file, encoding='utf-8')

        print(f"已删除乱码文字:{xml_file}")
# 遍历目录中的所有XML文件
xml_files = glob.glob("/xml/files/*.xml")  # 替换为你的XML文件所在的目录路径
for xml_file in xml_files:
    remove_invalid_characters(xml_file)
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值