Python 中的获取路径和文件读取
获取路径
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import os
'''
得到某个路径下的所有文件和目录名称
不包含深处的
'''
def get_files_and_directories(path):
return os.listdir(path)
'''
得到某个路径下的所有文件和目录的绝对路径
不包含深处的
'''
def get_files_and_directories_path(path):
files = []
with os.scandir(path) as it:
for entry in it:
if not entry.name.startswith('.'):
files.append(entry.path)
return files
'''
得到某个路径下的所有文件名称
不包含深处的
'''
def get_files(path):
files = []
with os.scandir(path) as it:
for entry in it:
if entry.is_file():
files.append(entry.name)
return files
'''
得到某个路径下的所有文件的绝对路径
不包含深处的
'''
def get_files_path(path):
files = []
with os.scandir(path) as it:
for entry in it:
if entry.is_file():
files.append(entry.path)
return files
'''
得到某个路径下的所有目录名称
不包含深处的
'''
def get_directories(path):
files = []
with os.scandir(path) as it:
for entry in it:
if entry.is_dir():
files.append(entry.name)
return files
'''
得到某个路径下的所有目录的绝对路径
不包含深处的
'''
def get_directories_path(path):
files = []
with os.scandir(path) as it:
for entry in it:
if entry.is_dir():
files.append(entry.path)
return files
'''
得到某个路径下的所有文件名称
包含深处的 [独立成 List]
'''
def get_files_deep_more(path_):
deep_files = []
def get_files_and_directories_deep_(path):
files = []
with os.scandir(path) as it:
for entry in it:
if entry.is_dir():
# 递归得到文件
get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
elif entry.is_file():
# 添加文件
files.append(entry.name)
deep_files.append(files)
get_files_and_directories_deep_(path_)
return deep_files
'''
得到某个路径下的所有文件名称
包含深处的 [不独立成 List]
'''
def get_files_deep(path_):
deep_files = []
def get_files_and_directories_deep_(path):
with os.scandir(path) as it:
for entry in it:
if entry.is_dir():
get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
elif entry.is_file():
deep_files.append(entry.name)
get_files_and_directories_deep_(path_)
return deep_files
'''
得到某个路径下的所有文件的绝对路径
包含深处的 [不独立成 List]
'''
def get_files_deep_path(path_):
deep_files = []
def get_files_and_directories_deep_(path):
with os.scandir(path) as it:
for entry in it:
if entry.is_dir():
get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
elif entry.is_file():
deep_files.append(entry.path)
get_files_and_directories_deep_(path_)
return deep_files
'''
得到某个路径下的所有目录名称
包含深处的 [独立成 List]
'''
def get_directories_deep_more(path_):
deep_files = []
def get_files_and_directories_deep_(path):
files = []
with os.scandir(path) as it:
for entry in it:
if entry.is_dir():
files.append(entry.name)
get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
deep_files.append(files)
get_files_and_directories_deep_(path_)
return deep_files
'''
得到某个路径下的所有目录名称
包含深处的 [不独立成 List]
'''
def get_directories_deep(path_):
deep_files = []
def get_files_and_directories_deep_(path):
files = []
with os.scandir(path) as it:
for entry in it:
if entry.is_dir():
files.append(entry.name)
get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
deep_files.extend(files)
get_files_and_directories_deep_(path_)
return deep_files
'''
得到某个路径下的所有目录名称
包含深处的 [不独立成 List]
'''
def get_directories_deep_path(path_):
deep_files = []
def get_files_and_directories_deep_(path):
files = []
with os.scandir(path) as it:
for entry in it:
if entry.is_dir():
files.append(entry.path)
get_files_and_directories_deep_(str(path).rstrip("/") + "/" + entry.name)
deep_files.extend(files)
get_files_and_directories_deep_(path_)
return deep_files
'''
得到某个路径下的所有文件和目录的名称
包含深处的
'''
def get_files_and_directories_deep(path):
files_ = []
dirs_ = []
for root, dirs, files in os.walk(path, topdown=False):
for name in files:
files_.append(str(name))
for name in dirs:
dirs_.append(str(name))
return files_, dirs_
'''
得到某个路径下的所有文件和目录的绝对路径
包含深处的
'''
def get_files_and_directories_deep_path(path):
files_ = []
dirs_ = []
for root, dirs, files in os.walk(path, topdown=False):
for name in files:
files_.append(os.path.join(root, name))
for name in dirs:
dirs_.append(os.path.join(root, name))
return files_, dirs_
文件读取
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import pandas as pd
'''
读取全部文件
'''
def read_file(*files, sep='\t', encoding="utf-8", orient="records", lines=True, header=None, low_memory=False):
print(files)
files_return = []
for file in files:
print(file)
if str(file).endswith(".txt"):
files_return.append(
pd.DataFrame(pd.read_table(file, sep=sep, header=header, encoding=encoding, low_memory=low_memory))
)
elif str(file).endswith(".csv"):
files_return.append(
pd.DataFrame(pd.read_csv(file, sep=',', header=header, encoding=encoding, low_memory=low_memory))
)
elif str(file).endswith(".xls") or str(file).endswith(".xlsx"):
files_return.append(
pd.DataFrame(pd.read_excel(file, sep=sep, header=header, encoding=encoding, low_memory=low_memory)))
elif str(file).endswith(".html") or str(file).endswith(".htm"):
files_return.append(pd.DataFrame(pd.read_html(file, header=header, encoding=encoding)))
elif str(file).endswith(".json"):
files_return.append(pd.DataFrame(pd.read_json(file, orient=orient, lines=lines, encoding=encoding)))
return files_return
'''
读取文件的第一行
返回 字典 类型 (表名: 列)
'''
def read_file_line_one(*files, sep="\t", end="\n", encoding="utf-8"):
line_one = {}
for file in files:
with open(file, 'r', encoding=encoding) as f:
readline = f.readline().rstrip(end)
one = readline.split(sep)
line_one.update({file: one})
return line_one
'''
写入文件 (整体写入, 默认再次调用原文件一样则会清空重新添加)
'''
def write_file(path, content, mode='w', encoding="utf-8"):
with open(path, mode, encoding=encoding) as f:
f.write(content)
'''
写入文件 (按行写入, 默认再次调用原文件一样则不会清空)
'''
def write_file_line(path, content, line='\n', mode='a', encoding="utf-8"):
for l in content:
with open(path, mode, encoding=encoding) as f:
f.write(l + line)
'''
按行读取文件
'''
def read_file_line(path, mode='r', encoding="utf-8"):
content = []
with open(path, mode, encoding=encoding) as f:
while True:
line = f.readline()
content.append(line)
if not line:
break
return content
'''
两个文件合并, 且列只留交集
'''
def file_concat_inner(*files, sep='\t', encoding="utf-8", header=0):
file_content = read_file(*files, sep=sep, encoding=encoding, header=header)
return pd.concat(file_content, join="inner", ignore_index=True)
'''
两个文件合并, 且行全留并集
'''
def file_merge_outer(*files, sep='\t', encoding="utf-8", header=0):
file_content = read_file(*files, sep=sep, encoding=encoding, header=header)
# 判断
if file_content.__len__() == 0:
pd_merge = None
elif file_content.__len__() == 1:
pd_merge = pd.DataFrame(file_content[0])
else:
pd_merge = pd.DataFrame(file_content[0])
for index in range(1, file_content.__len__()):
pd_merge = pd.merge(pd_merge, pd.DataFrame(file_content[index]), how='outer')
return pd_merge
'''
两个文件合并, 且列只留交集, 导出文件
'''
def file_concat_inner_output(*files, output_file, sep='\t', r_encoding="utf-8", header=0, index=False,
o_encoding="utf_8_sig"):
file_content = read_file(*files, sep=sep, encoding=r_encoding, header=header)
pd_concat = pd.concat(file_content, join="inner", ignore_index=True)
pd.DataFrame(pd_concat).to_csv(output_file, encoding=o_encoding, sep=sep, index=index)
'''
两个文件合并, 且行全留并集, 导出文件
'''
def file_merge_outer_output(*files, output_file, sep='\t', r_encoding="utf-8", header=0, index=False,
o_encoding="utf_8_sig"):
file_content = read_file(*files, sep=sep, encoding=r_encoding, header=header)
# 判断
if file_content.__len__() == 0:
pd_merge = None
elif file_content.__len__() == 1:
pd_merge = pd.DataFrame(file_content[0])
else:
pd_merge = pd.DataFrame(file_content[0])
for index in range(1, file_content.__len__()):
pd_merge = pd.merge(pd_merge, pd.DataFrame(file_content[index]), how='outer')
pd.DataFrame(pd_merge).to_csv(output_file, encoding=o_encoding, sep=sep, index=index)
'''
多个文件合并, 按行读取和写入, (大型文件)
'''
def file_r_w_output(*files, output_file, r_mode='r', o_mode='a', encoding="utf-8"):
for file in files:
print(file)
# 写
with open(output_file, o_mode, encoding=encoding) as w:
# 读
with open(file, r_mode, encoding=encoding) as f:
while True:
line = f.readline()
w.write(line)
if not line:
break