文件读取
open打开文件默认mode=‘r‘
打开方式:r , w , r+ , w+ , a , a+ , rb , wb , rb+
w:
1:文件不存在,则创建文件;
2:’w’模式打开文件,先清空文件的所有内容
3:read?(no) write?(yes)
r:
1:文件不存在,则报错;
2:’r’模式打开文件,不会清空文件的所有内容
3:read?(yes) write?(no)
a+:
1:文件不存在,创建文件;
2:write(yes) read(yes);
3:追加写入的内容到最后
文件读取的集中操作
readline:读取文件的一行
f.read(3):读取文件指定个数的字节,三个
seek方法,移动指针:
seek第一个参数是偏移量:>0,代表向右移动,<0,代表向左移动
seek第二个参数是:0:移动指针到文件开头;1:不移动指针;2:移动指针到文件末尾;
f.tell():查看指针当前位置;
with语句
当执行完with语句之后,自动清理或者关闭文件对象;
读取文件内容
import time
import functools
def timeit(fun): # fun=add
@functools.wraps(fun)
def wrapper(*args, **kwargs): # 1,2 args=(1,2)
"""wrapper functions"""
start_time = time.time()
res = fun(*args, **kwargs) # 参数解包add(*args, **kwargs)
end_time = time.time()
print("%s运行时间为%ss" %(fun.__name__,end_time-start_time))
return res
return wrapper
@timeit
def open1():
with open("/etc/passwd") as f:
# f.readlines(), 会把文件的所有内容加载到内存中;s适用于小文件;
for line in f.readlines():
line.split(":")
@timeit
def open2():
with open("/etc/passwd") as f:
# 迭代----》 生成器
for line in f:
line.split(":")
open1()
open2()
open1运行时间为0.0008792877197265625s
open2运行时间为0.00012445449829101562s
练习
- 生成一个大文件ips.txt,要求1200行, 每行随机为172.25.254.0/24段的ip;
- 读取ips.txt文件统计这个文件中ip出现频率排前10的ip;
# 1.
import random
# 生成文件'ips.txt'
def create_ips_file(filename):
ips = ['172.25.254.' + str(i) for i in range(1, 255)]
with open(filename, 'a+') as f:
for count in range(120000):
f.write(random.sample(ips,1)[0]+'\n')
# create_ips_file('ips.txt')
def sorted_by_ip(filename, count=10):
ips_dict = dict()
with open(filename) as f:
for ip in f:
if ip in ips_dict:
ips_dict[ip] += 1
else:
ips_dict[ip] = 1
sorted_ip = sorted(ips_dict.items(),key=lambda x:x[1],reverse=True)[:count]
return sorted_ip
# return [ip[0].strip() for ip in sorted_ip]
print(sorted_by_ip('ips.txt'))
# 2.
from collections import Counter
def new_method_sorted_by_ip(filename):
with open(filename) as f:
ipcount = Counter(f)
print(ipcount.most_common(10))
new_method_sorted_by_ip("ips.txt")