文件操作及常用模块使用

文件处理模型

输入,处理,输出。

输入:读取4个队员的训练数据,读取4个文件

james.txt 2-34,3:21,2,34,2.45,3.01,2:01,2:01,3:10,2-22

sarah.txt 2:58,2.58,2:39,2-25,2:55,2:54,2.18,2:55,2:55

julie.txt 2.59,2.11,2:11,2:23,3-10,2-23,3:10,3.21,3-21

mikey.txt 2:22,3.01,3:01,3.02,3:02,3.02,3:22,2.49,2:38

处理:标准化数据,切分数据,top3(最快的3个时间)

输出:将每个人的信息打印在屏幕上显示

kelly教练,每次训练结束后,还要同步更新4个文件太麻烦了,把所有记录写在一个文件中吧,这个对于你来说应该不难吧?

In [ ]

f = open('work/train_data_cor.txt')
line = f.readline()
print(line)
line = f.readline()
print(line)
f.close()
james,2004-5-21,2.34,3:21,2.34,2.45,3.01,2:01,2:01,3:10,2-22

julie,2006-5-9,2.59,2.11,2:11,2:23,3-10,2-23,3:10,3.21,3-21

open() 为bif(内置函数),参数有多个,必须的是文件路径。 返回的一个文件对象。

file.readline(),读取文件中的一行。

In [ ]

import sys
#读取整个文件内容
f = open('work/train_data_cor.txt')
line = f.readline()
while(line != ''):
    print(line)
    line = f.readline()
f.close()
james,2004-5-21,2.34,3:21,2.34,2.45,3.01,2:01,2:01,3:10,2-22

julie,2006-5-9,2.59,2.11,2:11,2:23,3-10,2-23,3:10,3.21,3-21

sarah,2004-3-8,2:58,2.58,2:39,2-25,2-55,2:54,2.18,2:55,2:55

mikey,2003-9-10,2:22,3.01,3:01,3.02,3:02,3.02,3:22,2.49,2:38

In [ ]

#更好的方式
f = open('work/train_data_cor.txt')
for line in f:
    print(line)
f.close()
james,2004-5-21,2.34,3:21,2.34,2.45,3.01,2:01,2:01,3:10,2-22

julie,2006-5-9,2.59,2.11,2:11,2:23,3-10,2-23,3:10,3.21,3-21

sarah,2004-3-8,2:58,2.58,2:39,2-25,2-55,2:54,2.18,2:55,2:55

mikey,2003-9-10,2:22,3.01,3:01,3.02,3:02,3.02,3:22,2.49,2:38

如果数据有问题呢?

In [ ]

f = open('work/train_data_wrg.txt')
for line in f:
    data = line.strip().split(',')

    print('姓名:'+data.pop(0)+'生日:'+data.pop(0)+'时间:'+str(data))
f.close()
姓名:james生日:2004-5-21时间:['2.34', '3:21', '2.34', '2.45', '3.01', '2:01', '2:01', '3:10', '2-22']
姓名:julie生日:2006-5-9时间:['2.59', '2.11', '2:11', '2:23', '3-10', '2-23', '3:10', '3.21', '3-21']

---------------------------------------------------------------------------IndexError Traceback (most recent call last)<ipython-input-14-a51a9349f79f> in <module> 3 data = line.strip().split(',') 4 ----> 5 print('姓名:'+data.pop(0)+'生日:'+data.pop(0)+'时间:'+str(data)) 6 f.close() IndexError: pop from empty list

有两种解决办法:

1.使用异常跳过有问题的数据 2.增加代码判断

In [ ]

#使用异常
f = open('work/train_data_wrg.txt')
for line in f:
  
    data = line.strip().split(',')
    try:

        print('姓名:'+data.pop(0)+'生日:'+data.pop(0)+'时间:'+str(data))
    except:
        pass
f.close()
姓名:james生日:2004-5-21时间:['2.34', '3:21', '2.34', '2.45', '3.01', '2:01', '2:01', '3:10', '2-22']
姓名:julie生日:2006-5-9时间:['2.59', '2.11', '2:11', '2:23', '3-10', '2-23', '3:10', '3.21', '3-21']
姓名:sarah生日:2004-3-8时间:['2:58', '2.58', '2:39', '2-25', '2-55', '2:54', '2.18', '2:55', '2:55']
姓名:mikey生日:2003-9-10时间:['2:22', '3.01', '3:01', '3.02', '3:02', '3.02', '3:22', '2.49', '2:38']

In [ ]

#代码判断
f = open('work/train_data_wrg.txt')#1
for line in f:#2
  
    data = line.strip().split(',')

    if len(data) != 1:
        print('姓名:'+data.pop(0)+'生日:'+data.pop(0)+'时间:'+str(data))

f.close()#3
姓名:james生日:2004-5-21时间:['2.34', '3:21', '2.34', '2.45', '3.01', '2:01', '2:01', '3:10', '2-22']
姓名:julie生日:2006-5-9时间:['2.59', '2.11', '2:11', '2:23', '3-10', '2-23', '3:10', '3.21', '3-21']
姓名:sarah生日:2004-3-8时间:['2:58', '2.58', '2:39', '2-25', '2-55', '2:54', '2.18', '2:55', '2:55']
姓名:mikey生日:2003-9-10时间:['2:22', '3.01', '3:01', '3.02', '3:02', '3.02', '3:22', '2.49', '2:38']

In [ ]

#clean的写法,三行变一行
with open('work/train_data_cor.txt') as f:
    for line in f:
        data = line.strip().split(',')
        print('姓名:'+data.pop(0)+'生日:'+data.pop(0)+'时间:'+str(data))
姓名:james生日:2004-5-21时间:['2.34', '3:21', '2.34', '2.45', '3.01', '2:01', '2:01', '3:10', '2-22']
姓名:julie生日:2006-5-9时间:['2.59', '2.11', '2:11', '2:23', '3-10', '2-23', '3:10', '3.21', '3-21']
姓名:sarah生日:2004-3-8时间:['2:58', '2.58', '2:39', '2-25', '2-55', '2:54', '2.18', '2:55', '2:55']
姓名:mikey生日:2003-9-10时间:['2:22', '3.01', '3:01', '3.02', '3:02', '3.02', '3:22', '2.49', '2:38']

如果文件不存在呢?

f = open('work/train_data1.txt') for line in f:

data = line.strip().split(',')

if len(data) != 1:
    print('姓名:'+data.pop(0)+'生日:'+data.pop(0)+'时间:'+data)

f.close()

file对象的函数列表

In [ ]

with open('work/train_data.txt') as f:
    data = f.read()
    print('整个文件\n'+data)
    f.seek(0)
    data = f.read(10)
    print('读取指定大小的文件内容\n'+data)
    print(f.tell())
整个文件
james,2004-5-21,2.34,3:21,2.34,2.45,3.01,2:01,2:01,3:10,2-22
julie,2006-5-9,2.59,2.11,2:11,2:23,3-10,2-23,3:10,3.21,3-21
kenny
sarah,2004-3-8,2:58,2.58,2:39,2-25,2-55,2:54,2.18,2:55,2:55
mikey,2003-9-10,2:22,3.01,3:01,3.02,3:02,3.02,3:22,2.49,2:38

读取指定大小的文件内容
james,2004
10

如何写入文件内容呢?

In [ ]

f = open('work/data.txt','w')
f.write('this is file content')
f.close()
Help on built-in function close:

close() method of _io.TextIOWrapper instance
    Flush and close the IO object.
    
    This method has no effect if the file is already closed.

open('work/data.txt','w')第一个参数文件路径,第二个参数打开文件的模式
f.write('this is file content')参数为写入的内容 f.close()关闭文件

对象转JSON

In [ ]

import json
class Athlete(json.JSONEncoder):
    def __init__(self,a_name,a_dob=None,a_times=[]):
        self.name = a_name
        self.dob = a_dob
        self.times = a_times
    def top3(self):
        return sorted(set([self.sanitize(t) for t in self.times]))[0:3]
    def sanitize(self,time_string):
        if '-' in time_string:
            splitter = '-'
        elif ':' in time_string:
            splitter = ':'
        else:
            return (time_string)
        (mins,secs) = time_string.split(splitter)
        return (mins+'.'+secs)


with open('work/train_data_cor.txt') as f:
    data = f.readline().strip().split(',')
    ath = Athlete(data.pop(0),data.pop(0),data)
    print(ath)

ath_json = json.dumps(ath.__dict__)
<__main__.Athlete object at 0x7fdd6cc6a450>

内中的json形式的变量保存到文件

In [ ]


with open('work/json.txt','w') as f:
    json.dump(ath_json,f)

读取json文件内容

In [ ]

with open('work/json.txt') as f:

    ath = json.load(f)
    print(ath)
{"name": "james", "dob": "2004-5-21", "times": ["2.34", "3:21", "2.34", "2.45", "3.01", "2:01", "2:01", "3:10", "2-22"]}

目录访问

In [ ]

import os
#返回当前工作目录
current_path = os.getcwd()
print('当前路径:'+current_path)

In [ ]

#改变当前工作目录
os.chdir('/home/aistudio/work')
#运行mkdir命令
os.system('mkdir today')

In [ ]

from pathlib import Path
#返回当前绝对路径
abs_path = os.path.abspath('')
print('abs_path:'+abs_path)
#路径是否存在
Path(abs_path).exists()

In [ ]

print('当前路径:'+os.getcwd())
listdir = os.listdir()
#返回当前路径下文件和文件夹名
print(listdir)

In [ ]

#是否为文件夹
os.path.isdir('/home/aistudio/work/today')

问题:显示work路径下的所有类型为txt的文件

In [ ]

import os
listdir = os.listdir('/home/aistudio/work')

target = []
for name in listdir:
    #防止文件名与文件夹名一样的情况
    # print(os.path.isfile(name))
    
    temp = name.split('.')
    (filename,filetype) = (temp.pop(0),temp.pop(0))
    if filetype == 'txt':
        target.append(name)
 
    # print('name:%s,type:%s' %(filename,filetype))

print(target)

问题:temp = name.split('.')在name为dir1时出现问题

In [ ]

import os
path = '/home/aistudio/work'
listdir = os.listdir(path)

target = []
for name in listdir:
    #防止文件名与文件夹名一样的情况
    # print(os.path.isfile(name))
    if os.path.isfile(path+'/'+name):
        temp = name.split('.')
        (filename,filetype) = (temp.pop(0),temp.pop(0))
        if filetype == 'txt':
                target.append(name)
    
        # print('name:%s,type:%s' %(filename,filetype))


print(target)

如果dir1中包含文件呢?

In [ ]

import os

target = []


path = '/home/aistudio/work'
listdir = os.listdir(path)
for name in listdir:
    #防止文件名与文件夹名一样的情况
    if os.path.isfile(path+'/'+name):
        temp = name.split('.')
        (filename,filetype) = (temp.pop(0),temp.pop(0))
        if filetype == 'txt':
            target.append(name)  
    else:
        #如果是文件夹,需要读取该文件夹的列表        
        dir_path = path+'/'+name
        listdir = os.listdir(dir_path)
        for name in listdir:
            #防止文件名与文件夹名一样的情况
            if os.path.isfile(dir_path+'/'+name):
                temp = name.split('.')
                (filename,filetype) = (temp.pop(0),temp.pop(0))
                if filetype == 'txt':
                    target.append(name)
print('结果:'+str(target))

如果dir1中又包含文件夹该怎么办呢?

有重复的代码,我们就会想到循环

In [ ]

import os

def recur(path):
    listdir = os.listdir(path)
    for name in listdir:
        if name[0] is '.' or name[0] is '_':
            continue
        next_path = path+'/'+name
        if os.path.isfile(next_path) :
            # print(next_path + '=====isfile')
            temp = name.split('.')
            (filename,filetype) = (temp.pop(0),temp.pop(0))
            if filetype == 'txt':
                target.append(name)
        else:
            recur(next_path)

    return os.path.dirname(next_path)
path = '/home/aistudio/work'
target = []
recur(path)
print(target)

怎么一步一步运行程序,并观察程序中变量的值

In [ ]

import pdb
import os



def recur(path):

    listdir = os.listdir(path)

    for name in listdir:
        if name[0] is '.' or name[0] is '_':
            continue
        next_path = path+'/'+name
        if os.path.isfile(next_path) :
            # print(next_path + '=====isfile')
            temp = name.split('.')
            (filename,filetype) = (temp.pop(0),temp.pop(0))
            if filetype == 'txt':
                target.append(name)
        else:
            recur(next_path)

    return os.path.dirname(next_path)

if __name__ == '__main__':
    pdb.set_trace()
    path = '/home/aistudio/work'
    target = []
    recur(path)
    print(target)

In [ ]

#制造数据
with open('work/loren.txt','w+') as f:
    for i in range(5000000):
        f.write('loren,2011-11-3,270,3.59,4.11,3:11,3:23,4-10,3-23,4:10,4.21,4-21')
        f.write('\n')

In [ ]

#只使用进程的方式


print('压缩作业开始了,请您耐心等待...')

infile = 'work/loren.txt'
outfile = 'work/myarchive.zip'
f = zipfile.ZipFile(outfile, 'w', zipfile.ZIP_DEFLATED)
f.write(infile)
f.close()


print('压缩作业结束了,请问还需要帮您做什么呢?')

#使用进程+线程的方式

In [ ]

import threading, zipfile

class AsyncZip(threading.Thread):
    def __init__(self, infile, outfile):
        threading.Thread.__init__(self)
        self.infile = infile
        self.outfile = outfile

    def run(self):
        f = zipfile.ZipFile(self.outfile, 'w', zipfile.ZIP_DEFLATED)
        f.write(self.infile)
        f.close()
        print('压缩完成,您要的文件在:', self.outfile)

background = AsyncZip('work/loren.txt', 'work/myarchive.zip')

print('压缩作业开始了,请您耐心等待...')
background.start()
print('我正在为您压缩,请问还需要帮您做什么呢?')
background.join()

 

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

身在江湖的郭大侠

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值