内存中的数据关闭程序就会消失,所以我们需要将数据存储在硬盘中,这就涉及到了文件的读写。
打开和关闭文件
使用open函数来打开文件,可以指定文件名、操作模式和字符编码
读取文件时,为了防止未知的读取错误,建议用try语句读取,最终finally里关闭文件
读取文本文件
建议用以下方式读取,mode:r表示读取:
import sys
print(sys.getdefaultencoding())
file = open(file="resources/致橡树.txt", mode="r", encoding="utf-8")
try:
# read的参数指定读取的大小,文本文件的话就是字符数
data = file.read(32)
while data:
print(data, end="")
data = file.read(32)
except:
print("读取文件发生错误")
finally:
file.close()
读取二进制文件
可以用seek和tell方法获取文件字节数,读取mode需要指定rb
file = open(file="resources/a.png", mode="rb")
# 将文件指针移动到末尾
file.seek(0, 2)
# 输出文件的字节数,也就是文件大小
print(file.tell())
# 将文件指针移动回去
file.seek(0, 0)
try:
data = file.read(512)
while data:
print(data, end="")
data = file.read(512)
finally:
file.close()
应用1:读取文件的MD5值
from _sha256 import sha256
from hashlib import md5
hasher = md5()
hasher2 = sha256()
file = open("resources/python-3.10.10-amd64.exe", "rb")
try:
data = file.read(512)
while data:
hasher.update(data)
hasher2.update(data)
data = file.read(512)
finally:
file.close()
# 获得16进制的MD5哈希摘要
print(hasher.hexdigest())
print(hasher2.hexdigest())
写入文件
mode参数w,表示写入
file = open("resources/my_song.txt", mode="w", encoding="utf-8")
try:
file.write("白日依山尽\n")
file.write("黄河入海流\n")
finally:
file.close()
mode参数a,表示追加
file = open("resources/my_song.txt", mode="a", encoding="utf-8")
try:
file.write("欲穷千里目\n")
file.write("更上一层楼\n")
finally:
file.close()
上下文语法
用with来读取文件,离开时会自动调用close,就不需要每次都手动写close了
with open("resources/my_song.txt", mode="a", encoding="utf-8") as file:
file.write("欲穷千里目\n")
file.write("更上一层楼\n")
文件复制
def file_copy(source_file, target_file):
with open(source_file, "rb") as source:
with open(target_file, "wb") as target:
data = source.read(512)
while data:
target.write(data)
data = source.read(512)
练习1:将100以内的质数输出到一个文件里
nums = []
for i in range(2, 100):
is_ok = True
for j in range(2, i-1):
if i % j == 0:
is_ok = False
if is_ok:
nums.append(i)
print(nums)
with open("resources/nums.txt", "w", encoding="utf-8") as file:
for num in nums:
file.write(f"{num}\n")
练习2:读取体温数据,判断体温是否正常
with open("resources/data.txt", "r", encoding="utf-8") as file:
with open("resources/temp_res.txt", "w", encoding="utf-8") as file2:
temp_data = file.readline()
while temp_data:
no, temp = temp_data.split()
temp = float(temp)
if 37 < temp < 39:
file2.write(f"{no} {temp} 发热\n")
if temp >= 39:
file2.write(f"{no} {temp} 发热\n")
temp_data = file.readline()
读取CSV文件
with open("resources/sports_scores.csv", encoding="utf-8") as file:
content = file.readline()
while content:
values = content.replace("\n", "").split(",")
print(values)
content = file.readline()
python中有专门的csv模块:
- delimiter参数设置分隔符,默认是逗号
- quotechar默认双引号,去掉包裹字符串的字符
import csv
with open("resources/sports_scores.csv", encoding="utf-8") as file:
reader = csv.reader(file, delimiter=",", quotechar="'")
for row in reader:
print(row)
写入CSV文件
注意,excel软件默认编码是gbk,所以写入时选择gbk
writerow方法也可以用delimiter参数设置分隔符,默认是逗号
import csv
with open("resources/data.txt", "r", encoding="utf-8") as file:
with open("resources/temp_res.csv", "w", encoding="gbk") as file2:
writer = csv.writer(file2)
writer.writerow(["ID", "TEMP", "INFO"])
temp_data = file.readline()
while temp_data:
no, temp = temp_data.split()
temp = float(temp)
if 37 < temp < 39:
writer.writerow([no, temp, "发热"])
if temp >= 39:
writer.writerow([no, temp, "高热"])
temp_data = file.readline()
关于CSV的操作常用的Pandas:read_csv / to_csv
用Python操作Excel
需要借助三方库,推荐这两个:
- xlrd / xlwt / xlutils 效率高,用来读取低版本excel文件
- openpyxl 操作高版本
pip install xlrd xlwt xlutils
pip install openpyxl
用xlrd读取Excel数据 xls
import xlrd
# 返回workbook对象
workbook = xlrd.open_workbook("resources/阿里巴巴2020年股票数据.xls")
# 获取所有sheet的名字
print(workbook.sheet_names())
# 获取指定的sheet
sheet = workbook.sheet_by_name("Sheet1")
# 也可以通过索引去获取 workbook.sheet_by_index(0)
# 获取sheet中总行数和列数
print(sheet.nrows, sheet.ncols)
# 获取某一行的数据,取得的是List对象
print(sheet.row(10))
# 获取某一行的指定列
print(sheet.row_slice(0, start_colx=0, end_colx=3))
# 获取指定列的数据,获取到的是List对象
print(sheet.col(4))
# 获取指定列的指定行
print(sheet.col_slice(4, start_rowx=0, end_rowx=10))
# 获取单元格数据
print(sheet.cell(2, 3))
print(type(sheet.cell(2, 3))) # 获取到的是单元格对象
print(sheet.cell(2, 3).value) # 加上value可以获取到单元格的值
# 遍历整个sheet的数据
for header in sheet.row_values(0):
print(f"{header}", end="\t")
print()
for row in range(1, sheet.nrows):
for col in range(sheet.ncols):
value = sheet.cell(row, col).value
if col == 0:
year, month, date, *_ = xlrd.xldate_as_tuple(value, 0)
print(f"{year}年{month:0>2d}月{date:0>2d}日", end="\t")
elif col == 5:
print(f"{int(value):>10d}", end="\t")
else:
print(f"{value:.2f}", end="\t")
print()
处理时间日期的另一种方式:
curr_date = xlrd.xldate_as_datetime(value, 0)
print(curr_date.strftime("%Y年%m月%d日"), end="\t")
Python中的时间日期类
from datetime import datetime
date1 = datetime(1990, 5, 1)
print(date1)
date2 = datetime.now()
print(date2)
data = date2 - date1
print(data.days) # 两个日期相差多少天
# 格式化
print(date2.strftime("%H:%M:%S %Y年%m月%d日"))
用openpyxl读取Excel数据 xlsx
import openpyxl
workbook = openpyxl.load_workbook("resources/阿里巴巴2020年股票数据.xlsx") # type: openpyxl.Workbook
# 注意上面加了type才能在PyCharm中显示代码提示
# 获取sheet名称
print(workbook.sheetnames)
# 获取指定sheet
# sheet = workbook["Sheet1"]
sheet = workbook.worksheets[0]
print(type(sheet))
print(sheet.dimensions) # 获取表格数据范围 A1:G255
print(sheet.max_row, sheet.max_column)
# 循环遍历所有单元格
for row in range(2, sheet.max_row+1):
for col in range(1, sheet.max_column+1):
value = sheet.cell(row, col).value
if col == 1:
print(value.strftime("%Y年%m月%d日"), end="\t")
elif col == 6:
print(f'{value:<10d}', end="\t")
else:
print(f"{value:.2f}", end="\t")
print()
读取sheet有两种方式
- sheet = workbook[“Sheet1”] 好处是比较直观
- sheet = workbook.worksheets[0] 这样可以显示代码提示
读取单元格也有两种方式
print(sheet.cell(2, 2).value) # 注意,此处取值使用1开始,而不是0
print(sheet["B2"].value)
用比较直观的方式遍历:
for row in range(2, sheet.max_row+1):
for col in "ABCDEFG":
value = sheet[f"{col}{row}"].value
if col == "A":
print(value.strftime("%Y年%m月%d日"), end="\t")
elif col == "F":
print(f'{value:<10d}', end="\t")
else:
print(f"{value:.2f}", end="\t")
print()
另一种遍历方式:先获取每行,再依次获取每个单元格的数据
如果需要对每行数据做处理的话,此种方式比较合理
for row in sheet.rows:
for cell in row:
print(cell.value, end="\t")
print()
用xlwt写Excel
import xlwt
import random
# 1.创建工作簿
wb = xlwt.Workbook()
# 2.添加工作表sheet
sheet = wb.add_sheet("考试成绩") # type:xlwt.Worksheet
# 给表头加格式
header_style = xlwt.XFStyle()
header_pattern = xlwt.Pattern()
header_pattern.pattern = xlwt.Pattern.SOLID_PATTERN
# header_pattern.pattern_back_colour = xlwt.Style.colour_map["green"]
header_pattern.pattern_fore_colour = xlwt.Style.colour_map["aqua"]
header_style.pattern = header_pattern
# 修改字体大小
header_font = xlwt.Font()
header_font.height = 20 * 22 # 字体大小22像素
header_font.bold = True # 加粗选择True
header_style.font = header_font
# 设置对齐
header_alignment = xlwt.Alignment()
header_alignment.vert = xlwt.Alignment.VERT_CENTER # 垂直对齐
header_alignment.horz = xlwt.Alignment.HORZ_CENTER # 左右对齐
header_style.alignment = header_alignment
# 设置边框
borders = xlwt.Borders()
props = (
("top", "top_colour"), ("right", "right_colour"),
("bottom", "bottom_colour"), ("left", "left_colour")
)
for position, color in props:
setattr(borders, position, xlwt.Borders.THIN)
setattr(borders, color, 0)
header_style.borders = borders
# 调整列宽和行高,不调了,好烦orz
# 3.向单元格写入数据
sheet.write(0, 0, "姓名", header_style)
sheet.write(0, 1, "数学", header_style)
sheet.write(0, 2, "语文", header_style)
sheet.write(0, 3, "英语", header_style)
# 写入5个同学的3门课程成绩
names = ["", "Alice", "Bob", "Chris", "Diva", "Eve"]
for row in range(1, 6):
for col in range(4):
if col == 0:
sheet.write(row, col, names[row])
else:
sheet.write(row, col, random.randrange(40, 100))
# 4.保存工作簿
wb.save("resources/考试成绩表.xls")
用openpyxl写Excel文件(xlsx)
文档参考官方:
Tutorial — openpyxl 3.1.2 documentation
import openpyxl
import random
from openpyxl.worksheet.worksheet import Worksheet
# 1.创建工作簿
wb = openpyxl.Workbook()
# 2.添加工作表sheet
# sheet = wb.create_sheet("考试成绩") # type:Worksheet
sheet = wb.active # type:Worksheet
# 建议用active,否则等于会新建一个sheet叫考试成绩
sheet.title = "期末考试成绩"
# 3.向单元格写入数据
sheet.cell(1, 1, "姓名")
sheet.cell(1, 2, "数学")
sheet.cell(1, 3, "语文")
sheet.cell(1, 4, "英语")
# 写入5个同学的3门课程成绩
names = ["", "Alice", "Bob", "Chris", "Diva", "Eve"]
for row in range(2, 7):
for col in range(1, 5):
if col == 1:
sheet.cell(row, col, names[row-1])
else:
sheet.cell(row, col, random.randrange(40, 100))
# 4.保存工作簿
wb.save("resources/考试成绩表.xlsx")
用openpyxl设置格式和公式
import openpyxl
from openpyxl.styles import Font, Alignment, Border, Side
from openpyxl.worksheet.worksheet import Worksheet
# 对齐, 左右对齐和上下对齐
alignment = Alignment(horizontal="center", vertical="center")
# 边框
side = Side(border_style="thin", color="000000")
wb = openpyxl.load_workbook("resources/考试成绩表.xlsx") # type:openpyxl.Workbook
sheet = wb.worksheets[0] # type:Worksheet
# 设置行高和列宽
sheet.row_dimen