找到指定后缀的文件列表
找到指定后缀的文件,返回找到的文件路径列表,会递归文件夹。
import os
# 找到指定后缀的文件
def find_type(path:str,fix:str):
dlist=os.listdir(path)
file_list=[]
for i in dlist:
ps=os.path.join(path, i)
if os.path.isdir(ps):
file_list+=find_type(ps,fix)
else:
if(ps[-len(fix):]==fix):
file_list.append(ps)
return file_list
转换文件编码
示例为把gb2312编码的文件转化为utf8编码。
def conv(file:str):
s=""
try:
with open(file,encoding="gb2312") as f:
s=f.read()
os.remove(file)
with open(file,mode="w+",encoding="utf-8") as f:
f.write(s)
except Exception as e:
print("conv failed",file)
删除文件注释
输入文件名,行注释标签,块注释标签,生成删除注释后的文件保存并覆盖原文件。
例如C语言使用 // 和 /* */ 来注释,调用方式如下:
del_comm("main.c","//",["/*","*/"])
# 删除所有注释
def del_comm(file:str,line_comm:str,blok_comm:list[str]):
text=""
try:
with open(file,encoding="utf-8") as f:
lines=f.readlines()
except Exception as e:
print("decode failed",file)
return
for i in range(len(lines)):
index=lines[i].find(line_comm)
if(index>=0):
lstr=lines[i][:index]
else:
lstr=lines[i].rstrip()
if(len(lstr.strip())>0):
text+=lstr+'\n'
elif(text[-2:]=='\\\n'):
text+='\n'
index_start=0
text_out=""
while True:
index=text.find(blok_comm[0],index_start)
index_end=text.find(blok_comm[1],index)
if(index>=0 and index_end>index):
text_out+= text[index_start:index]
index_start=index_end+len(blok_comm[1])
else:
text_out+=text[index_start:]
break
with open(file,mode="w+",encoding="utf-8") as f:
f.write(text_out)
去除过多的空白字符
def simplified(text:str):
'''
返回一个新字符串,去除过多的空白字符
'''
space=['\t', '\n', '\v', '\f', '\r', ' ']
r=""
start=0
is_empty=False
while text[start] in space:
start+=1
for i in range(start,len(text)):
if text[i] in space:
is_empty=True
else:
if(is_empty==True):
r+=" "
is_empty=False
r+=text[i]
return r
分割合并文件
在一些特定场合,可能对文件大小有限制,这时可以将大文件分割为小文件,在传输结束后再合并
import os
import sys
import re
def split(file:str,max_size:int):
if not os.path.exists(file):
return
size=os.path.getsize(file)
with open(file,mode='rb') as f:
off=0
index=0
while off<size:
f.seek(off)
rb=f.read(max_size)
off+=len(rb)
with open(file+'.split.'+str(index),mode='wb+') as g:
g.write(rb)
index+=1
os.remove(file)
# 扫描分包文件数目
def scan_list(file:str):
file_split=os.path.splitext(file)
path_split=os.path.split(file_split[0])
file_list=[]
# print(f"file_split: {file_split}")
if(file_split[-1][1:].isdigit()) and file_split[0].endswith('.split'):
# 获得不带数字尾缀的路径和文件名
# print(f"path_split: {path_split}")
slist=os.listdir(path_split[0])
for item in slist:
# print(f' scan_list {item}')
if(item.find(path_split[-1])!=-1):
file_list.append(os.path.join(path_split[0],item))
return file_split[0][:-6],len(file_list)
def combin(file:str):
name,num=scan_list(file)
index=0
with open(name,mode='wb+') as f:
while index<num:
com_name=name+'.split.'+str(index)
with open(com_name,mode='rb') as g:
d=g.read()
f.write(d)
os.remove(com_name)
index+=1
if __name__ == "__main__":
if(len(sys.argv)>=2):
file=sys.argv[1]
sp=file.split('.')
if(sp[-1].isdigit()):
combin(file)
else:
if(len(sys.argv)>=3):
split_size=int(sys.argv[2])
split(file,split_size*1024*1024)
else:
print('use: split [file] [split_size(Mb)] to split')
else:
print('use: split [file] [split_size(Mb)] to split; or\nsplit [file.split.*] to combin')
监控剪切板
此为通过文件拷贝的形式实现跨电脑间的粘贴复制功能,也可以改为通过socket或其他形式实现。
import pyperclip
import time
import threading
import os
import zlib
import sys
import json
class monitor():
def __init__(self) -> None:
self.monitor_state=False
self.save_path='./'
self.load_path='./'
self.file_name='clipboard'
def save_full_path(self):
return os.path.join(self.save_path,self.file_name)
def load_full_path(self):
return os.path.join(self.load_path,self.file_name)
def clipboard_get(self):
data = pyperclip.paste()
return data
def clipboard_set(self,data:str):
pyperclip.copy(data)
def save_to_file(self,data:str):
if(os.path.exists(self.save_full_path())):
os.remove(self.save_full_path())
with open(self.save_full_path(),mode='wb+') as f:
f.write(zlib.compress(data.encode('utf-8')))
def load_from_file(self):
if(os.path.exists(self.load_full_path())):
with open(self.load_full_path(),mode='rb') as f:
d=zlib.decompress(f.read())
os.remove(self.load_full_path())
return d.decode('utf-8')
return None
def start(self,save_path:str,load_path:str):
self.save_path=save_path
self.load_path=load_path
self.monitor_state=True
th=threading.Thread(target=self.file_observer,args=())
th.start()
th2=threading.Thread(target=self.clip_observer,args=())
th2.start()
def file_observer(self):
recent_txt = ''
while self.monitor_state:
txt = self.load_from_file()
if txt is not None and len(txt)>0 and txt != recent_txt:
recent_txt = txt
self.clipboard_set(recent_txt)
time.sleep(0.3)
print('file observer stop.')
def clip_observer(self):
recent_txt = self.clipboard_get()
while self.monitor_state:
txt = self.clipboard_get()
if len(txt)>0 and txt != recent_txt:
recent_txt = txt
self.save_to_file(recent_txt)
time.sleep(0.2)
print('clip observer stop.')
def load_cfg():
file='./clip_cfg.json'
if os.path.exists(file):
with open(file,mode='r',encoding='utf-8') as f:
d=f.read()
cfg=json.loads(d)
return cfg['save_path'],cfg['load_path']
return './','./'
if __name__ == '__main__':
if(len(sys.argv)>=3):
save_path=sys.argv[1]
load_path=sys.argv[2]
else:
print('use: clip [save_path] [load_path]')
save_path,load_path=load_cfg()
mon=monitor()
mon.start(save_path,load_path)
while True:
try:
time.sleep(1)
except KeyboardInterrupt as e:
print(str(e))
mon.monitor_state=False
break
镜像源的文件批量下载
把镜像的网页文件保存下来,使用此脚本即可下载所有文件
import requests
from bs4 import BeautifulSoup
def download_file(url, output_path):
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(output_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
f.flush()
# 根据html文件获取要下载的文件列表
def get_file_list(page:str):
with open(page, mode='r',encoding='utf-8') as file:
html_content = file.read()
soup = BeautifulSoup(html_content, 'html.parser')
def get_path():
h1_tag = soup.find('h1').text
index=h1_tag.find('/')
path='https://mirrors.tuna.tsinghua.edu.cn'+h1_tag[index:]
# print(path)
return path
def get_list():
li_tags = soup.find_all('td')
flist=[]
for li_tag in li_tags:
if li_tag.text.endswith('deb'):
flist.append(li_tag.text)
# print(li_tag.text)
return flist
return get_path(),get_list()
if __name__ == "__main__":
url,flist=get_file_list('gdb_list.html')
for item in flist:
path=url+'/'+item
print(f"start download {path}")
download_file(path,'./files/gdb/'+item)