Python文件操作

数据攻城小狮子

于 2023-03-03 16:02:37 发布

阅读量222

点赞数

分类专栏： Python学习文章标签： python 开发语言

本文为博主原创文章，未经博主允许不得转载。

本文链接：https://blog.csdn.net/weixin_46322367/article/details/129317530

版权

Python学习专栏收录该内容

41 篇文章 10 订阅

订阅专栏

该文章展示了如何使用Python进行不同类型的文件操作，包括合并txt文件，读写JSON数据，处理CSV文件，以及在Word、Excel和PowerPoint文件中搜索特定文本。通过示例代码，读者可以学习到文件的读取、写入及内容遍历等基本操作。

摘要由CSDN通过智能技术生成

txt文件操作

def mergeTxt(txtFiles):
    with open('result.txt','w',encoding='utf8') as fp:
        with open(txtFiles[0],encoding='utf8') as fp1,open(txtFiles[1],encoding='utf8') as fp2:
            while True:
                # 交替读取文件1和文件2中的行，写入结果文件
                line1=fp1.readline()
                if line1:
                    fp.write(line1)
                    fp.write('\n')
                else:
                    # 如果文件1结束，结束循环
                    
                    flag=False
                    break
                line2=fp2.readline()
                if line2:
                    fp.write(line2)
                else:
                    # 如果文件2结束，结束循环
                    flag=True
                    break
            # 获取尚未结束的文件对象
            fp3=fp1 if flag else fp2
            # 把剩余内容写入结果文件
            for line in fp3:
                fp.write(line)
txtFiles=['文本1.txt','文本2.txt']
print("文本1:")
with open(txtFiles[0],encoding='utf8') as fp1:
    for line1 in fp1:
        print(line1)
print("文本2:")
with open(txtFiles[1],encoding='utf8') as fp2:
    for line2 in fp2:
        print(line2)
mergeTxt(txtFiles)
print('合并后:')
with open('result.txt',encoding='utf8') as fp3:
    for line3 in fp3:
        print(line3)

在这里插入图片描述

JSON文件操作

import json
information=[
    {'小区名称':'小区A','均价':8000,'月交易量':20},
    {'小区名称':'小区B','均价':8500,'月交易量':35},
    {'小区名称':'小区C','均价':7800,'月交易量':50},
    {'小区名称':'小区D','均价':12000,'月交易量':18}
]
# 写入
with open('房屋信息.json','w',encoding='utf8') as fp:
    json.dump(information,fp,indent=4,separators=[',',':'])
# 读取
with open('房屋信息.json',encoding='utf8') as fp:
    information=json.load(fp)
    for info in information:
        print(info)

在这里插入图片描述

CSV文件操作

from csv import reader,writer
from random import randrange
from datetime import date,timedelta

fn='data.csv'
with open(fn,'w',encoding='utf8') as fp:
    # 创建csv文件写对象
    wr=writer(fp)
    # 写入表头
    wr.writerow(['日期','销量'])
    # 第一天的日期，2022年1月1日
    startDate=date(2020,1,1)
    # 生成100个模拟数据
    for i in range(100):
        # 生成一个模拟数据，写入csv文件
        amount=500+i*5+randrange(5,50)
        wr.writerow([str(startDate),amount])
        # 下一天
        startDate=startDate+timedelta(days=1)
# 读取并显示上面代码生成的csv文件内容
with open(fn,encoding='utf8') as fp:
    for line in reader(fp):
        if line:
            print(*line)

在这里插入图片描述

Word、Excel、PowerPoint文件操作

检查并输出当前文件夹及其子文件夹中包含指定字符串的Word、Excel、PowerPoint文件名称

from sys import argv
from os import listdir
from os.path import join,isfile,isdir
from docx import Document
from openpyxl import load_workbook
from pptx import Presentation

def checkdocx(dstStr, fn):
    # 打开.docx文档
    document = Document(fn)
    # 遍历所有段落文本
    for p in document.paragraphs:
        if dstStr in p.text:
            return True
    # 遍历所有表格中的单元格文本
    for table in document.tables:
        for row in table.rows:
            for cell in row.cells:
                if dstStr in cell.text:
                    return True
    return False


def checkxlsx(dstStr, fn):
    # 打开.xlsx文件
    wb = load_workbook(fn)
    # 遍历所有工作表的单元格
    for ws in wb.worksheets:
        for row in ws.rows:
            for cell in row:
                try:
                    if dstStr in cell.value:
                        return True
                except:
                    pass
    return False


def checkpptx(dstStr, fn):
    # 打开.pptx文档
    presentation = Presentation(fn)
    # 遍历所有幻灯片
    for slide in presentation.slides:
        for shape in slide.shapes:
            # 表格中的单元格文本
            if shape.shape_type == 19:
                for row in shape.table.rows:
                    for cell in row.cells:
                        if dstStr in cell.text_frame.text:
                            return True
            # 文本框
            elif shape.shape_type == 14:
                try:
                    if dstStr in shape.text:
                        return True
                except:
                    pass
    return False


def main(dstStr, flag):
    # 使用广度优先的方式遍历当前文件夹及其所有子文件夹
    # 一个圆点表示当前文件夹
    dirs = ['.']
    while dirs:
        # 获取第一个尚未遍历的文件夹名称
        currentDir = dirs.pop(0)
        for fn in listdir(currentDir):
            path = join(currentDir, fn)
            if isfile(path):
                if path.endswith('.docx') and checkdocx(dstStr, path):
                    print(path)
                elif path.endswith('.xlsx') and checkxlsx(dstStr, path):
                    print(path)
                elif path.endswith('.pptx') and checkpptx(dstStr, path):
                    print(path)
            # 广度优先遍历目录树
            elif flag and isdir(path):
                dirs.append(path)


if argv[1] != '/s':
    dstStr = argv[1]
    flag = False
else:
    dstStr = argv[2]
    flag = True
main('姓名', flag)# 指定查找字符串为'姓名'