【python】一些常用的小脚本

找到指定后缀的文件列表

找到指定后缀的文件,返回找到的文件路径列表,会递归文件夹。

import os



# 找到指定后缀的文件
def find_type(path:str,fix:str):
    dlist=os.listdir(path)
    file_list=[]
    for i in dlist:
        ps=os.path.join(path, i)
        if os.path.isdir(ps):
            file_list+=find_type(ps,fix)
        else:
            if(ps[-len(fix):]==fix):
                file_list.append(ps)
    return file_list

转换文件编码

示例为把gb2312编码的文件转化为utf8编码。

def conv(file:str):
    s=""
    try:
        with open(file,encoding="gb2312") as f:
            s=f.read()
        os.remove(file)
        with open(file,mode="w+",encoding="utf-8") as f:
            f.write(s)
    except Exception as e:
        print("conv failed",file)

删除文件注释

输入文件名,行注释标签,块注释标签,生成删除注释后的文件保存并覆盖原文件。
例如C语言使用 // 和 /* */ 来注释,调用方式如下:

del_comm("main.c","//",["/*","*/"])
# 删除所有注释
def del_comm(file:str,line_comm:str,blok_comm:list[str]):
    text=""
    try:
        with open(file,encoding="utf-8") as f:
            lines=f.readlines()
    except Exception as e:
        print("decode failed",file)
        return
    for i in range(len(lines)):
        index=lines[i].find(line_comm)
        if(index>=0):
            lstr=lines[i][:index]
        else:
            lstr=lines[i].rstrip()
        if(len(lstr.strip())>0):
            text+=lstr+'\n'
        elif(text[-2:]=='\\\n'):
            text+='\n'
    index_start=0
    text_out=""
    while True:
        index=text.find(blok_comm[0],index_start)
        index_end=text.find(blok_comm[1],index)
        if(index>=0 and index_end>index):
            text_out+= text[index_start:index]
            index_start=index_end+len(blok_comm[1])
        else:
            text_out+=text[index_start:]
            break
    with open(file,mode="w+",encoding="utf-8") as f:
        f.write(text_out)
        

去除过多的空白字符

def simplified(text:str):
  '''
  返回一个新字符串,去除过多的空白字符
  '''
  space=['\t', '\n', '\v', '\f', '\r',  ' ']

  r=""
  start=0
  is_empty=False
  while text[start] in space:
    start+=1
  for i in range(start,len(text)):
    if text[i] in space:
      is_empty=True
    else:
      if(is_empty==True):
        r+=" "
        is_empty=False
      r+=text[i]
  return r

分割合并文件

在一些特定场合,可能对文件大小有限制,这时可以将大文件分割为小文件,在传输结束后再合并


import os
import sys
import re

def split(file:str,max_size:int):
  if not os.path.exists(file):
    return
  size=os.path.getsize(file)
  with open(file,mode='rb') as f:
    off=0
    index=0
    while off<size:
      f.seek(off)
      rb=f.read(max_size)
      off+=len(rb)
      with open(file+'.split.'+str(index),mode='wb+') as g:
        g.write(rb)
      index+=1
  os.remove(file)

# 扫描分包文件数目
def scan_list(file:str):
  file_split=os.path.splitext(file)
  path_split=os.path.split(file_split[0])
  file_list=[]
  # print(f"file_split: {file_split}")
  if(file_split[-1][1:].isdigit()) and file_split[0].endswith('.split'):
    # 获得不带数字尾缀的路径和文件名
    # print(f"path_split: {path_split}")
    slist=os.listdir(path_split[0])
    for item in slist:
      # print(f' scan_list {item}')
      if(item.find(path_split[-1])!=-1):
        file_list.append(os.path.join(path_split[0],item))
  return file_split[0][:-6],len(file_list)

def combin(file:str):
  name,num=scan_list(file)
  index=0
  with open(name,mode='wb+') as f:
    while index<num:
      com_name=name+'.split.'+str(index)
      with open(com_name,mode='rb') as g:
        d=g.read()
        f.write(d)
      os.remove(com_name)
      index+=1

if __name__ == "__main__":
  if(len(sys.argv)>=2):
    file=sys.argv[1]
    sp=file.split('.')
    if(sp[-1].isdigit()):
      combin(file)
    else:
      if(len(sys.argv)>=3):
        split_size=int(sys.argv[2])
        split(file,split_size*1024*1024)
      else:
        print('use: split [file] [split_size(Mb)] to split')
  else:
    print('use: split [file] [split_size(Mb)] to split; or\nsplit [file.split.*] to combin')


监控剪切板

此为通过文件拷贝的形式实现跨电脑间的粘贴复制功能,也可以改为通过socket或其他形式实现。

import pyperclip
import time
import threading
import os
import zlib
import sys
import json

class monitor():
    def __init__(self) -> None:
        self.monitor_state=False
        self.save_path='./'
        self.load_path='./'
        self.file_name='clipboard'
    def save_full_path(self):
        return os.path.join(self.save_path,self.file_name)
    def load_full_path(self):
        return os.path.join(self.load_path,self.file_name)
    def clipboard_get(self):
        data = pyperclip.paste()
        return data
    def clipboard_set(self,data:str):
        pyperclip.copy(data)
    def save_to_file(self,data:str):
        if(os.path.exists(self.save_full_path())):
            os.remove(self.save_full_path())
        with open(self.save_full_path(),mode='wb+') as f:
            f.write(zlib.compress(data.encode('utf-8')))
    def load_from_file(self):
        if(os.path.exists(self.load_full_path())):
            with open(self.load_full_path(),mode='rb') as f:
                d=zlib.decompress(f.read())
            os.remove(self.load_full_path())
            return d.decode('utf-8')
        return None
    def start(self,save_path:str,load_path:str):
        self.save_path=save_path
        self.load_path=load_path
        self.monitor_state=True
        th=threading.Thread(target=self.file_observer,args=())
        th.start()
        th2=threading.Thread(target=self.clip_observer,args=())
        th2.start()
    def file_observer(self):
        recent_txt = ''
        while self.monitor_state:
            txt = self.load_from_file()
            if txt is not None and len(txt)>0 and txt != recent_txt:
                recent_txt = txt
                self.clipboard_set(recent_txt)
            time.sleep(0.3)
        print('file observer stop.')

    def clip_observer(self):
        recent_txt = self.clipboard_get()
        while self.monitor_state:
            txt = self.clipboard_get()
            if  len(txt)>0 and txt != recent_txt:
                recent_txt = txt
                self.save_to_file(recent_txt)
            time.sleep(0.2)
        print('clip observer stop.')

def load_cfg():
    file='./clip_cfg.json'
    if os.path.exists(file):
        with open(file,mode='r',encoding='utf-8') as f:
            d=f.read()
            cfg=json.loads(d)
            return cfg['save_path'],cfg['load_path']
    return './','./'

if __name__ == '__main__':
    if(len(sys.argv)>=3):
        save_path=sys.argv[1]
        load_path=sys.argv[2]
    else:
        print('use: clip [save_path] [load_path]')
        save_path,load_path=load_cfg()
    mon=monitor()
    mon.start(save_path,load_path)
    while True:
        try:
            time.sleep(1)
        except KeyboardInterrupt as e:
            print(str(e))
            mon.monitor_state=False
            break


镜像源的文件批量下载

把镜像的网页文件保存下来,使用此脚本即可下载所有文件

import requests
from bs4 import BeautifulSoup


def download_file(url, output_path):
  with requests.get(url, stream=True) as r:
    r.raise_for_status()
    with open(output_path, 'wb') as f:
      for chunk in r.iter_content(chunk_size=8192):
        if chunk:
          f.write(chunk)
          f.flush()

# 根据html文件获取要下载的文件列表
def get_file_list(page:str):
  with open(page, mode='r',encoding='utf-8') as file:
    html_content = file.read()
    soup = BeautifulSoup(html_content, 'html.parser')
    def get_path():
      h1_tag = soup.find('h1').text
      index=h1_tag.find('/')
      path='https://mirrors.tuna.tsinghua.edu.cn'+h1_tag[index:]
      # print(path)
      return path
    def get_list():
      li_tags = soup.find_all('td')
      flist=[]
      for li_tag in li_tags:
        if li_tag.text.endswith('deb'):
          flist.append(li_tag.text)
          # print(li_tag.text)
      return flist
    return get_path(),get_list()


if __name__ == "__main__":
  url,flist=get_file_list('gdb_list.html')
  for item in flist:
    path=url+'/'+item
    print(f"start download {path}")
    download_file(path,'./files/gdb/'+item)




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值