Python 中的获取路径和文件读取

最新推荐文章于 2024-08-16 18:23:17 发布

YKenan

最新推荐文章于 2024-08-16 18:23:17 发布

阅读量1.1k

点赞数 1

分类专栏： python 文章标签： Python

本文链接：https://blog.csdn.net/YKenan/article/details/92406476

版权

python 专栏收录该内容

27 篇文章 6 订阅

订阅专栏

Python 中的获取路径和文件读取

获取路径

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-

import os

'''
得到某个路径下的所有文件和目录名称
不包含深处的
'''


def get_files_and_directories(path):
    return os.listdir(path)


'''
得到某个路径下的所有文件和目录的绝对路径
不包含深处的
'''


def get_files_and_directories_path(path):
    files = []
    with os.scandir(path) as it:
        for entry in it:
            if not entry.name.startswith('.'):
                files.append(entry.path)
    return files


'''
得到某个路径下的所有文件名称
不包含深处的
'''


def get_files(path):
    files = []
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_file():
                files.append(entry.name)
    return files


'''
得到某个路径下的所有文件的绝对路径
不包含深处的
'''


def get_files_path(path):
    files = []
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_file():
                files.append(entry.path)
    return files


'''
得到某个路径下的所有目录名称
不包含深处的
'''


def get_directories(path):
    files = []
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_dir():
                files.append(entry.name)
    return files


'''
得到某个路径下的所有目录的绝对路径
不包含深处的
'''


def get_directories_path(path):
    files = []
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_dir():
                files.append(entry.path)
    return files


'''
得到某个路径下的所有文件名称
包含深处的 [独立成 List]
'''


def get_files_deep_more(path_):
    deep_files = []

    def get_files_and_directories_deep_(path):
        files = []
        with os.scandir(path) as it:
            for entry in it:
                if entry.is_dir():
                    # 递归得到文件
                    get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
                elif entry.is_file():
                    # 添加文件
                    files.append(entry.name)
            deep_files.append(files)

    get_files_and_directories_deep_(path_)
    return deep_files


'''
得到某个路径下的所有文件名称
包含深处的 [不独立成 List]
'''


def get_files_deep(path_):
    deep_files = []

    def get_files_and_directories_deep_(path):
        with os.scandir(path) as it:
            for entry in it:
                if entry.is_dir():
                    get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
                elif entry.is_file():
                    deep_files.append(entry.name)

    get_files_and_directories_deep_(path_)
    return deep_files


'''
得到某个路径下的所有文件的绝对路径
包含深处的 [不独立成 List]
'''


def get_files_deep_path(path_):
    deep_files = []

    def get_files_and_directories_deep_(path):
        with os.scandir(path) as it:
            for entry in it:
                if entry.is_dir():
                    get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
                elif entry.is_file():
                    deep_files.append(entry.path)

    get_files_and_directories_deep_(path_)
    return deep_files


'''
得到某个路径下的所有目录名称
包含深处的 [独立成 List]
'''


def get_directories_deep_more(path_):
    deep_files = []

    def get_files_and_directories_deep_(path):
        files = []
        with os.scandir(path) as it:
            for entry in it:
                if entry.is_dir():
                    files.append(entry.name)
                    get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
            deep_files.append(files)

    get_files_and_directories_deep_(path_)
    return deep_files


'''
得到某个路径下的所有目录名称
包含深处的 [不独立成 List]
'''


def get_directories_deep(path_):
    deep_files = []

    def get_files_and_directories_deep_(path):
        files = []
        with os.scandir(path) as it:
            for entry in it:
                if entry.is_dir():
                    files.append(entry.name)
                    get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
            deep_files.extend(files)

    get_files_and_directories_deep_(path_)
    return deep_files


'''
得到某个路径下的所有目录名称
包含深处的 [不独立成 List]
'''


def get_directories_deep_path(path_):
    deep_files = []

    def get_files_and_directories_deep_(path):
        files = []
        with os.scandir(path) as it:
            for entry in it:
                if entry.is_dir():
                    files.append(entry.path)
                    get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
            deep_files.extend(files)

    get_files_and_directories_deep_(path_)
    return deep_files


'''
得到某个路径下的所有文件和目录的名称
包含深处的
'''


def get_files_and_directories_deep(path):
    files_ = []
    dirs_ = []
    for root, dirs, files in os.walk(path, topdown=False):
        for name in files:
            files_.append(str(name))
        for name in dirs:
            dirs_.append(str(name))
    return files_, dirs_


'''
得到某个路径下的所有文件和目录的绝对路径
包含深处的
'''


def get_files_and_directories_deep_path(path):
    files_ = []
    dirs_ = []
    for root, dirs, files in os.walk(path, topdown=False):
        for name in files:
            files_.append(os.path.join(root, name))
        for name in dirs:
            dirs_.append(os.path.join(root, name))
    return files_, dirs_

文件读取

#!/usr/bin/env python3
# -*- coding: UTF-8 -*-

import pandas as pd

'''
读取全部文件
'''


def read_file(*files, sep='\t', encoding="utf-8", orient="records", lines=True, header=None, low_memory=False):
    print(files)
    files_return = []
    for file in files:
        print(file)
        if str(file).endswith(".txt"):
            files_return.append(
                pd.DataFrame(pd.read_table(file, sep=sep, header=header, encoding=encoding, low_memory=low_memory))
            )
        elif str(file).endswith(".csv"):
            files_return.append(
                pd.DataFrame(pd.read_csv(file, sep=',', header=header, encoding=encoding, low_memory=low_memory))
            )
        elif str(file).endswith(".xls") or str(file).endswith(".xlsx"):
            files_return.append(
                pd.DataFrame(pd.read_excel(file, sep=sep, header=header, encoding=encoding, low_memory=low_memory)))
        elif str(file).endswith(".html") or str(file).endswith(".htm"):
            files_return.append(pd.DataFrame(pd.read_html(file, header=header, encoding=encoding)))
        elif str(file).endswith(".json"):
            files_return.append(pd.DataFrame(pd.read_json(file, orient=orient, lines=lines, encoding=encoding)))
    return files_return


'''
读取文件的第一行
返回 字典 类型 (表名: 列)
'''


def read_file_line_one(*files, sep="\t", end="\n", encoding="utf-8"):
    line_one = {}
    for file in files:
        with open(file, 'r', encoding=encoding) as f:
            readline = f.readline().rstrip(end)
            one = readline.split(sep)
        line_one.update({file: one})
    return line_one


'''
写入文件 (整体写入, 默认再次调用原文件一样则会清空重新添加)
'''


def write_file(path, content, mode='w', encoding="utf-8"):
    with open(path, mode, encoding=encoding) as f:
        f.write(content)


'''
写入文件 (按行写入, 默认再次调用原文件一样则不会清空)
'''


def write_file_line(path, content, line='\n', mode='a', encoding="utf-8"):
    for l in content:
        with open(path, mode, encoding=encoding) as f:
            f.write(l + line)


'''
按行读取文件
'''


def read_file_line(path, mode='r', encoding="utf-8"):
    content = []
    with open(path, mode, encoding=encoding) as f:
        while True:
            line = f.readline()
            content.append(line)
            if not line:
                break
    return content


'''
两个文件合并, 且列只留交集
'''


def file_concat_inner(*files, sep='\t', encoding="utf-8", header=0):
    file_content = read_file(*files, sep=sep, encoding=encoding, header=header)
    return pd.concat(file_content, join="inner", ignore_index=True)


'''
两个文件合并, 且行全留并集
'''


def file_merge_outer(*files, sep='\t', encoding="utf-8", header=0):
    file_content = read_file(*files, sep=sep, encoding=encoding, header=header)
    # 判断
    if file_content.__len__() == 0:
        pd_merge = None
    elif file_content.__len__() == 1:
        pd_merge = pd.DataFrame(file_content[0])
    else:
        pd_merge = pd.DataFrame(file_content[0])
        for index in range(1, file_content.__len__()):
            pd_merge = pd.merge(pd_merge, pd.DataFrame(file_content[index]), how='outer')
    return pd_merge


'''
两个文件合并, 且列只留交集, 导出文件
'''


def file_concat_inner_output(*files, output_file, sep='\t', r_encoding="utf-8", header=0, index=False,
                             o_encoding="utf_8_sig"):
    file_content = read_file(*files, sep=sep, encoding=r_encoding, header=header)
    pd_concat = pd.concat(file_content, join="inner", ignore_index=True)
    pd.DataFrame(pd_concat).to_csv(output_file, encoding=o_encoding, sep=sep, index=index)


'''
两个文件合并, 且行全留并集, 导出文件
'''


def file_merge_outer_output(*files, output_file, sep='\t', r_encoding="utf-8", header=0, index=False,
                            o_encoding="utf_8_sig"):
    file_content = read_file(*files, sep=sep, encoding=r_encoding, header=header)
    # 判断
    if file_content.__len__() == 0:
        pd_merge = None
    elif file_content.__len__() == 1:
        pd_merge = pd.DataFrame(file_content[0])
    else:
        pd_merge = pd.DataFrame(file_content[0])
        for index in range(1, file_content.__len__()):
            pd_merge = pd.merge(pd_merge, pd.DataFrame(file_content[index]), how='outer')
    pd.DataFrame(pd_merge).to_csv(output_file, encoding=o_encoding, sep=sep, index=index)


'''
多个文件合并, 按行读取和写入, (大型文件)
'''


def file_r_w_output(*files, output_file, r_mode='r', o_mode='a', encoding="utf-8"):
    for file in files:
        print(file)
        # 写
        with open(output_file, o_mode, encoding=encoding) as w:
            # 读
            with open(file, r_mode, encoding=encoding) as f:
                while True:
                    line = f.readline()
                    w.write(line)
                    if not line:
                        break