Python⽂件IO操作涉及⽂件读写操作,获取⽂件 后缀名 ,修改后缀名,获取⽂件修改时间, 压缩 ⽂ 件, 加密 ⽂件等操作。
Python⽇期章节,由表⽰⼤⽇期的 calendar, date模块,逐渐过渡到表⽰时间刻度更⼩的模块: datetime, time模块,按照此逻辑展开。
Python 多线程 希望透过5个⼩例⼦,帮助你对多线程模型编程本质有些更清晰的认识。
-
获取后缀名
import os file_next=os.path.splitext("C:/jupyter_notebook/venv_env/json.txt") front,next=file_next front # 'C:/jupyter_notebook/venv_env/json' next # '.txt'
-
文件读操作
import os # 创建文件夹 def mkdir(path): isexists=os.path.exists(path) if not isexists: os.mkdir(path) # 读取文件信息 def openfile(filename): f=open(filename) fllist=f.read() f.close() return fllist #返回读取内容
-
文件写操作
# 写⼊⽂件信息 # example1 # w写⼊,如果⽂件存在,则清空内容后写⼊,不存在则创建 f = open(r"C:/jupyter_notebook/venv_env/test1.txt", "w", encoding="utf-8") print(f.write("测试⽂件写⼊")) f.close() # example2 # a写⼊,⽂件存在,则在⽂件内容后追加写⼊,不存在则创建 f = open(r"C:/jupyter_notebook/venv_env/test1.txt", "a", encoding="utf-8") print(f.write("测试⽂件写⼊")) f.close # example3 # with关键字系统会⾃动关闭⽂件和处理异常 with open(r"C:/jupyter_notebook/venv_env/test1.txt", "w") as f: f.write("hello world!")
-
路径中的文件名
import os file_ext=os.path.split('C:/jupyter_notebook/venv_env/test1.txt') ipath,file=file_ext ipath # 'C:/jupyter_notebook/venv_env' file # 'test1.txt'
-
批量修改文件后缀
本例⼦使⽤Python的 os模块和 argparse模块,将⼯作⽬录 work_dir下所有后缀名为 old_ext的⽂ 件修改为后缀名为 new_ext
import argparse import os # 定义脚本参数 def get_parser(): parser = argparse.ArgumentParser( description='⼯作⽬录中⽂件后缀名修改') parser.add_argument('work_dir', metavar='WORK_DIR', type=str, nargs=1, help='修改后缀名的⽂件⽬录') parser.add_argument('old_ext', metavar='OLD_EXT', type=str, nargs=1, help='原来的后缀') parser.add_argument('new_ext', metavar='NEW_EXT', type=str, nargs=1, help='新的后缀') return parse # 后缀名批量修改 def batch_rename(work_dir, old_ext, new_ext): """ 传递当前⽬录,原来后缀名,新的后缀名后,批量重命名后缀 """ for filename in os.listdir(work_dir): # 获取得到⽂件后缀 split_file = os.path.splitext(filename) file_ext = split_file[1] # 定位后缀名为old_ext 的⽂件 if old_ext == file_ext: # 修改后⽂件的完整名称 newfile = split_file[0] + new_ext # 实现重命名操作 os.rename( os.path.join(work_dir, filename), os.path.join(work_dir, newfile) ) print("完成重命名") print(os.listdir(work_dir)) # 实现 def main(): """ main函数 """ # 命令⾏参数 parser = get_parser() args = vars(parser.parse_args()) # 从命令⾏参数中依次解析出参数 work_dir = args['work_dir'][0] old_ext = args['old_ext'][0] if old_ext[0] != '.': old_ext = '.' + old_ext new_ext = args['new_ext'][0] if new_ext[0] != '.': new_ext = '.' + new_ext batch_rename(work_dir, old_ext, new_ext)
-
xls批量转换成xlsx
import os def xls_to_xlsx(work_dir): """ 传递当前⽬录,原来后缀名,新的后缀名后,批量重命名后缀 """ old_ext, new_ext = '.xls', '.xlsx' for filename in os.listdir(work_dir): # 获取得到⽂件后缀 split_file = os.path.splitext(filename) file_ext = split_file[1] # 定位后缀名为old_ext 的⽂件 if old_ext == file_ext: # 修改后⽂件的完整名称 newfile = split_file[0] + new_ext # 实现重命名操作 os.rename( os.path.join(work_dir, filename), os.path.join(work_dir, newfile) ) print("完成重命名") print(os.listdir(work_dir)) xls_to_xlsx('./data') ''' # 输出结果: # ['cut_words.csv', 'email_list.xlsx', 'email_test.docx', 'email_test.jpg', 'email_test.xlsx', 'geo_data.png', 'geo_data.xlsx', 'iotest.txt', 'pyside2.md', 'PySimpleGUI-4.7.1-py3-none-any.whl', 'test.txt', 'test_excel.xlsx', 'ziptest', 'ziptest.zip'] '''
-
定制文件不同行
⽐较两个⽂件在哪些⾏内容不同,返回这些⾏的编号,⾏号编号从1开始。
# 定义统计⽂件⾏数的函数 # 统计⽂件个数 def statLineCnt(statfile): print('⽂件名:'+statfile) cnt = 0 with open(statfile, encoding='utf-8') as f: while f.readline(): cnt += 1 return cnt # 统计⽂件不同之处的⼦函数: # more表⽰含有更多⾏数的⽂件 def diff(more, cnt, less): difflist = [] with open(less, encoding='utf-8') as l: with open(more, encoding='utf-8') as m: lines = l.readlines() for i, line in enumerate(lines): if line.strip() != m.readline().strip(): difflist.append(i) if cnt - i > 1: difflist.extend(range(i + 1, cnt)) return [no+1 for no in difflist] # 主函数: # 返回的结果⾏号从1开始 # list表⽰fileA和fileB不同的⾏的编号 def file_diff_line_nos(fileA, fileB): try: cntA = statLineCnt(fileA) cntB = statLineCnt(fileB) if cntA > cntB: return diff(fileA, cntA, fileB) return diff(fileB, cntB, fileA) except Exception as e: print(e) ''' ⽐较两个⽂件A和B,拿相对较短的⽂件去⽐较,过滤⾏后的换⾏符 \n和空格。 暂未考虑某个⽂件最后可能有的多⾏空⾏等特殊情况 使⽤ file_diff_line_nos 函数: ''' if __name__ == '__main__': import os print(os.getcwd()) ''' 例⼦: fileA = "'hello world!!!!''\ 'nice to meet you'\ 'yes'\ 'no1'\ 'jack'" fileB = "'hello world!!!!''\ 'nice to meet you'\ 'yes' " ''' diff = file_diff_line_nos('./testdir/a.txt', './testdir/b.txt') print(diff) # [4, 5]
-
获取指定后缀名文件
import os def find_file(work_dir,extension='jpg'): lst = [] for filename in os.listdir(work_dir): print(filename) splits = os.path.splitext(filename) ext = splits[1] # 拿到扩展名 if ext == '.'+extension: lst.append(filename) return lst r=find_file('.','md') print(r)# 返回所有⽬录下的md⽂件
-
批量获取文件修改时间
# 获取⽬录下⽂件的修改时间 import os from datetime import datetime print(f"当前时间:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") def get_modify_time(indir): for root, _, files in os.walk(indir): # 循环D:\works⽬录和⼦⽬录 for file in files: absfile = os.path.join(root, file) modtime = datetime.fromtimestamp(os.path.getmtime(absfile)) now = datetime.now() difftime = now-modtime if difftime.days < 20: # 条件筛选超过指定时间的⽂件 print(f"""{absfile} 修改时间[{modtime.strftime('%Y-%m-%d %H:%M:%S')}] 距今[{difftime.days:3d}天{difftime.seconds//3600:2d}时 {difftime.seconds%3600//60:2d}]""" ) # 打印相关信息 get_modify_time('./data')
-
批量压缩文件
import zipfile # 导⼊zipfile,这个是⽤来做压缩和解压的Python模块; import os import time def batch_zip(start_dir): start_dir = start_dir # 要压缩的⽂件夹路径 file_news = start_dir + '.zip' # 压缩后⽂件夹的名字 z = zipfile.ZipFile(file_news, 'w', zipfile.ZIP_DEFLATED) for dir_path, dir_names, file_names in os.walk(start_dir): # 这⼀句很重要,不replace的话,就从根⽬录开始复制 f_path = dir_path.replace(start_dir, '') f_path = f_path and f_path + os.sep # 实现当前⽂件夹以及包含的所有⽂件的压缩 for filename in file_names: z.write(os.path.join(dir_path, filename), f_path + filename) z.close() return file_news batch_zip('./data/ziptest')
-
32位加密
import hashlib # 对字符串s实现32位加密 def hash_cry32(s): m = hashlib.md5() m.update((str(s).encode('utf-8'))) return m.hexdigest() print(hash_cry32(1)) # c4ca4238a0b923820dcc509a6f75849b print(hash_cry32('hello')) # 5d41402abc4b2a76b9719d911017c592
-
年的日历图
import calendar from datetime import date mydate = date.today() year_calendar_str = calendar.calendar(2022) print(f"{mydate.year}年的⽇历图:{year_calendar_str}\n")
2022年的⽇历图: 2022 January February March Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su 1 2 1 2 3 4 5 6 1 2 3 4 5 6 3 4 5 6 7 8 9 7 8 9 10 11 12 13 7 8 9 10 11 12 13 10 11 12 13 14 15 16 14 15 16 17 18 19 20 14 15 16 17 18 19 20 17 18 19 20 21 22 23 21 22 23 24 25 26 27 21 22 23 24 25 26 27 24 25 26 27 28 29 30 28 28 29 30 31 31 April May June Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su 1 2 3 1 1 2 3 4 5 4 5 6 7 8 9 10 2 3 4 5 6 7 8 6 7 8 9 10 11 12 11 12 13 14 15 16 17 9 10 11 12 13 14 15 13 14 15 16 17 18 19 18 19 20 21 22 23 24 16 17 18 19 20 21 22 20 21 22 23 24 25 26 25 26 27 28 29 30 23 24 25 26 27 28 29 27 28 29 30 30 31 July August September Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su 1 2 3 1 2 3 4 5 6 7 1 2 3 4 4 5 6 7 8 9 10 8 9 10 11 12 13 14 5 6 7 8 9 10 11 11 12 13 14 15 16 17 15 16 17 18 19 20 21 12 13 14 15 16 17 18 18 19 20 21 22 23 24 22 23 24 25 26 27 28 19 20 21 22 23 24 25 25 26 27 28 29 30 31 29 30 31 26 27 28 29 30 October November December Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su 1 2 1 2 3 4 5 6 1 2 3 4 3 4 5 6 7 8 9 7 8 9 10 11 12 13 5 6 7 8 9 10 11 10 11 12 13 14 15 16 14 15 16 17 18 19 20 12 13 14 15 16 17 18 17 18 19 20 21 22 23 21 22 23 24 25 26 27 19 20 21 22 23 24 25 24 25 26 27 28 29 30 28 29 30 26 27 28 29 30 31 31
-
判断是否为闰年
import calendar from datetime import date mydate = date.today() is_leap = calendar.isleap(mydate.year) print_leap_str = "%s年是闰年" if is_leap else "%s年不是闰年\n" print(print_leap_str % mydate.year) # 2022年不是闰年
-
三月的日历图
import calendar from datetime import date mydate = date.today() month_calendar_str = calendar.month(mydate.year, mydate.month) print(f"{mydate.year}年-{mydate.month}⽉的⽇历图:{month_calendar_str}\n")
2022年-7⽉的⽇历图: July 2022 Mo Tu We Th Fr Sa Su 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31