1、文本文件操作
1.1读取文本文件
1.1.1使用 read()
read()
方法用于读取文件的所有内容并将其作为一个字符串返回。
# 使用 'read' 方法读取文件的所有内容
with open('example.txt', 'r') as file:
content = file.read()
print(content)
1.1.2使用 readline()
readline()
方法用于一次读取文件的一行。
# 使用 'readline' 方法逐行读取文件
with open('example.txt', 'r') as file:
line = file.readline()
while line:
print(line, end='')
line = file.readline()
1.1.3使用 readlines()
readlines()
方法用于读取文件的所有行,并将它们作为一个字符串列表返回。
# 使用 'readlines' 方法读取文件的所有行
with open('example.txt', 'r') as file:
lines = file.readlines()
for line in lines:
print(line, end='')
注意:在使用 print
函数输出每一行时,我们使用了 end=''
参数来防止 print
函数在每一行的末尾再添加一个换行符,因为从文件中读取的每一行已经包含了换行符。
1.2写入文本文件
1.2.1使用 write()
# 使用 'write' 方法写入文件
with open('example.txt', 'w') as file:
file.write("Hello, World!")
1.2.2使用 writelines()
# 使用 'writelines' 方法写入文件
lines = ["Hello, World", "Welcome to Python programming"]
with open('example.txt', 'w') as file:
file.writelines(line + '\n' for line in lines)
1.3open模式
1.4综合案例
日志文件读写
写一段代码,模拟生成accuracy逐步上升、loss逐步下降的训练日志,并将日志信息记录到training_log.txt中
# 写一段代码,模拟生成accuracy逐步上升、loss逐步下降的训练日志,并将日志信息记录到training_log.txt中
import random
# 设置epoch数量和初始的accuracy和loss值
epoch = 100
accuracy = 0.5
loss = 0.9
# 打开一个txt文件以记录每个epoch的结果
with open("training_log.txt", 'w') as f:
f.write("Epoch\tAccuracy\tLoss\n")
# 循环遍历每个epoch
for epoch_i in range(1, epoch + 1):
# 模拟accuracy逐步上升和loss逐步下降
accuracy += random.uniform(0, 0.005) # 随机增加accuracy
loss -= random.uniform(0, 0.005) # 随机减少loss
# 限制accuracy和loss的范围
accuracy = min(1, accuracy)
loss = max(0, loss)
# 将结果写入txt文件
f.write(f"{epoch_i}\t{accuracy:.3f}\t{loss:.3f}\n")
# 打印每个epoch的结果
print(f"Epoch:{epoch_i}, Accuracy:{accuracy}, Loss:{loss}")
2、pandas 表格数据处理
安装pandas
pip install pandas
2.1pandas 表格读取与数据处理
2.1.1加载表格
读取 csv 表格
import pandas as pd
data_loc = "resources/yolov5s.csv"
data = pd.read_csv(data_loc)
print(data)
jupyter notebook 输出结果
读取 excel 表格
import pandas as pd
data_loc = "resources/销售数据.xlsx"
data = pd.read_excel(data_loc)
print(data)
2.1.2基础数据分析
获取数据基本信息
import pandas as pd
data_loc = "resources/销售数据.xlsx"
data = pd.read_excel(data_loc)
print(data.describe())
输出头几行
import pandas as pd
data_loc = "resources/销售数据.xlsx"
data = pd.read_excel(data_loc)
print(data.head()) # 默认输出头5行
输出尾几行
import pandas as pd
data_loc = "resources/销售数据.xlsx"
data = pd.read_excel(data_loc)
print(data.tail()) # 默认输出尾5行
定位第一行
import pandas as pd
data_loc = "resources/销售数据.xlsx"
data = pd.read_excel(data_loc)
# 读取第一行的值,行标签是“0”
data_0 = data.loc[0]
print(data_0)
定位指定列
import pandas as pd
data_loc = "resources/销售数据.xlsx"
data = pd.read_excel(data_loc)
# 读取"大类编码"列全部值
data_1 = data.loc[:, "大类编码"]
print(data_1)
通过指定行和列,定位指定元素
import pandas as pd
data_loc = "resources/销售数据.xlsx"
data = pd.read_excel(data_loc)
# 读取第1行,"大类编码"列对应的值
data_2 = data.loc[1, "大类编码"]
print(data_2)
对行和列使用切片
import pandas as pd
data_loc = "resources/销售数据.xlsx"
data = pd.read_excel(data_loc)
# 读取某个区域
# 读取第1行到第3行,“大类编码”列到“中类名称”列这个区域内的值
data_3 = data.loc[1:3, "大类编码":"中类名称"]
print(data_3)
提取符合要求的元素
import pandas as pd
data_loc = "resources/销售数据.xlsx"
data = pd.read_excel(data_loc)
# 根据条件进行提取
# 读取“销售数量”列中大于10的值
data_4 = data.loc[data["销售数量"] > 10]
print(data_4)
import pandas as pd
data_loc = "resources/销售数据.xlsx"
data = pd.read_excel(data_loc)
# 根据条件进行提取
# 切片操作
data_5 = data.loc[data["销售数量"] > 10, ["小类编码", "小类名称"]]
print(data_5)
数据分组与排序
import pandas as pd
file_loc = "resources/销售数据.xlsx"
data = pd.read_excel(file_loc)
data_extract = data.groupby('商品类型')['销售金额'].sum()
data_extract = data_extract.reset_index()
print(data_extract)
2.1.3表格数据保存
将处理好的数据保存成csv或excel
import pandas as pd
file_loc = "resources/销售数据.xlsx"
data = pd.read_excel(file_loc)
data_extract = data.groupby('商品类型')['销售金额'].sum()
data_extract = data_extract.reset_index()
data_extract.to_csv("处理好的表格.csv", encoding='gbk', index=False)
data_extract.to_excel("处理好的表格.xlsx", index=False)
3、Matplotlib画图
安装Matplotlib
pip install matplotlib
3.1绘制折线图(plot)
小demo
import numpy as np
import matplotlib.pyplot as plt
# 创建一个x值的数值,从-2π到2π,步长为0.01
x = np.arange(-2 * np.pi, 2 * np.pi, 0.01)
# 计算每个x值对应的sin(x)值
y = np.sin(x)
# 使用matplotlib来绘制图像
plt.figure() # 创建一个新的图像窗口
plt.plot(x, y) # 绘制折线图
plt.title("sin(x)") # 设置图像的标题
plt.xlabel('x') # 设置x轴的标签
plt.ylabel('y') # 设置y轴的标签
plt.grid(True) # 显示网格
plt.show() # 显示图像
对excel数据进行绘图
import pandas as pd
import matplotlib.pyplot as plt
data_loc = 'resources/yolov5s.csv'
data = pd.read_csv(data_loc, index_col=0)
# print(data.columns) # 展示列标签
train_bbox_loss = data[' train/box_loss']
x_list = [i for i in range(len(train_bbox_loss))]
plt.plot(x_list, train_bbox_loss)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title("YOLOv5s")
plt.grid(True)
plt.show()
同时绘制出多个折线图
import pandas as pd
import matplotlib.pyplot as plt
file_1_loc = "resources/yolov5l.csv"
file_2_loc = "resources/yolov5m.csv"
file_3_loc = "resources/yolov5s.csv"
file_1 = pd.read_csv(file_1_loc)
file_2 = pd.read_csv(file_2_loc)
file_3 = pd.read_csv(file_3_loc)
file_1_train_box_loss = file_1[' train/box_loss']
file_2_train_box_loss = file_2[' train/box_loss']
file_3_train_box_loss = file_3[' train/box_loss']
x_list = [i for i in range(len(file_1_train_box_loss))]
plt.plot(x_list, file_1_train_box_loss)
plt.plot(x_list, file_2_train_box_loss)
plt.plot(x_list, file_3_train_box_loss)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Train box_loss")
plt.grid()
plt.legend(["yolov5l", "yolov5m", "yolov5s"])
plt.show()
3.2绘制散点图(scatter)
import matplotlib.pyplot as plt
import numpy as np
# 创建数据
num_points = 100
x = np.random.rand(num_points) # x坐标
y = np.random.rand(num_points) # y坐标
colors = np.random.rand(num_points) # 每个点的颜色
sizes = 1000 * np.random.rand(num_points) # 每个点的大小
alphas = np.random.rand(num_points) # 每个点的透明度
# 创建散点图
plt.scatter(x, y, c=colors, s=sizes, alpha=alphas, cmap='viridis')
# 显示颜色条
plt.colorbar()
# 显示图像
plt.show()
3.3绘制柱状图(bar)
import matplotlib.pyplot as plt
import numpy as np
# 数据
labels = ["A", "B", "C", "D", "E"]
values = [3, 7, 2, 5, 8]
# 设置标签的位置
x = np.arange(len(labels))
# 绘制柱状图
plt.bar(x, values, color='blue', align='center', alpha=0.7)
# 设置图标的标题和轴标签
plt.title("Simple Bar Chart")
plt.xlabel('Labels')
plt.ylabel('Values')
# 设置x轴的标签
plt.xticks(x, labels)
# 显示图像
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['font.sans-serif']=['SimHei'] # 用黑体显示中文
matplotlib.rcParams['axes.unicode_minus']=False # 正常显示负号
file_loc = 'resources/销售数据.xlsx'
data = pd.read_excel(file_loc)
print(data)
data_extract = data.groupby('大类名称')['销售金额'].sum().reset_index().sort_values('销售金额',ascending=True).reset_index(drop=True)
print(data_extract)
# 使用大类名称作为x轴的标签
x_labels = data_extract['大类名称']
bars = plt.bar(x_labels, data_extract['销售金额'], tick_label=x_labels)
plt.xticks(rotation=45) # 如果标签文字太长,可以旋转标签以便更好地显示
# 在每一根柱上显示对应的高度值
for bar in bars:
yval = bar.get_height()
plt.text(bar.get_x() + bar.get_width()/2, yval, round(yval, 2), ha='center', va='bottom') # ha: 水平对齐, va: 垂直对齐
plt.show()
3.4绘制饼状图(pie)
import matplotlib.pyplot as plt
# 数据
sizes = [15, 30, 45, 10] # 各部分的大小
labels = ["A", "B", "C", "D"] # 各部分的标签
colors = ["yellow", "red", "green", "orange"] # 各部分的颜色
explode = (0.1, 0, 0, 0) # 突出显示第一个部分
# 绘制扇形图
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct="%1.1f%%", shadow=True, startangle=140)
# 设置为等比例,这样扇形就是一个圆
plt.axis('equal')
# 显示图像
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['font.sans-serif'] = ['SimHei'] # 用黑体显示中文
matplotlib.rcParams['axes.unicode_minus'] = False # 正常显示负号
file_loc = 'resources/销售数据.xlsx'
data = pd.read_excel(file_loc)
print(data)
data_extract = data.groupby('商品类型')['销售金额'].sum()
data_extract = data_extract.reset_index()
# 提取销售金额和大类名称
sales_amounts = data_extract['销售金额']
category_names = data_extract['商品类型']
# 计算每个类别的占比
sales_proportions = sales_amounts / sales_amounts.sum()
# 画饼状图
fig1, ax1 = plt.subplots()
ax1.pie(sales_proportions, labels=category_names, autopct='%1.1f%%', startangle=90)
plt.show()
4、OpenCV 影像数据处理
安装OpenCV
pip install opencv-python==4.5.36
4.1图像数据处理
4.1.1加载图片与展示图片
import cv2
img_path = "4.opencv_demos/resources/food.png"
# 以色彩模式读取图片
image_color = cv2.imread(img_path, cv2.IMREAD_COLOR)
# 以灰度模式读取图片
image_gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
# 显示图片
cv2.imshow("Color Image", image_color)
cv2.imshow("Grayscale Image", image_gray)
# 等待用户按键,然后关闭窗口
cv2.waitKey(0)
cv2.destroyAllWindows()
4.1.2图片缩放
import cv2
# 读取图片
image = cv2.imread('4.opencv_demos/resources/food.png') # 默认彩色模式
# 检查图片是否正确加载
if image is None:
print("ERROR: Could not load image.")
exit()
# 获取图片的原始尺寸
print(image.shape)
original_height, original_width = image.shape[:2]
# 计算新的尺寸
new_width = int(original_width / 2)
new_height = int(original_height / 2)
# 使用cv2.resize进行图片缩放
resized_image = cv2.resize(image, (new_width, new_height),
interpolation=cv2.INTER_AREA) # 插值算法:interpolation=cv2.INTER_AREA
# 显示原始图片和缩放后的图片
cv2.imshow("Original Image", image)
cv2.imshow("Resized Image", resized_image)
# 等待用户按键,然后关闭窗口
cv2.waitKey(0)
cv2.destroyAllWindows()
4.1.3图像旋转
import cv2
# 读取图片
image = cv2.imread("4.opencv_demos/resources/food.png")
# 使用cv2.rotate()函数旋转图片
rotated_90 = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) # 顺时针旋转90度
rotated_180 = cv2.rotate(image, cv2.ROTATE_180) # 顺时针旋转180度
rotated_270 = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) # 顺时针旋转270度
cv2.imshow("original", image)
cv2.imshow("90 degree", rotated_90)
cv2.imshow("180 degree", rotated_180)
cv2.imshow("270 degree", rotated_270)
cv2.waitKey(0)
4.1.4色彩转换
import cv2
def convert_and_show_image(image_path):
# 读取图像
image = cv2.imread(image_path)
if image is None:
print(f"无法加载图像: {image_path}")
return
# 显示原图
cv2.imshow('Original Image', image)
# 定义要转换的颜色空间和对应的标志
color_spaces = {
'Gray': cv2.COLOR_BGR2GRAY,
'HSV': cv2.COLOR_BGR2HSV,
'HLS': cv2.COLOR_BGR2HLS,
'Lab': cv2.COLOR_BGR2Lab,
'YCrCb': cv2.COLOR_BGR2YCrCb
}
# 遍历颜色空间并进行转换
for color_space, flag in color_spaces.items():
converted_image = cv2.cvtColor(image, flag)
cv2.imshow(f'{color_space} Image', converted_image)
# 等待用户按键,然后关闭窗口
cv2.waitKey(0)
cv2.destroyAllWindows()
# 使用图像路径作为参数调用函数
convert_and_show_image('resources/food.png')
4.1.5保存图像
import cv2
# 读取图片
image = cv2.imread("4.opencv_demos/resources/food.png")
# 如果图像不为空,则保存图像
if image is not None:
cv2.imwrite('output_image.png', image)
else:
print("无法读取图像")
4.2视频数据处理
4.2.1读取摄像头
import cv2
# 创建一个 VideoCapture 对象,参数 0 表示使用默认的摄像头
cap = cv2.VideoCapture(0)
while True:
# 读取一帧
ret, frame = cap.read() # ret: True or False; frame:一帧画面
# 如果读取成功,显示这一帧
if ret:
cv2.imshow("Frame", frame)
# 按 "q" 健退出循环
if cv2.waitKey(1) & 0xFF == ord('q'): # cv2.waitKey(1) 每一帧画面之间隔一毫秒;0xFF == ord('q') 按下键盘的q
break
# 释放资源并关闭窗口
cap.release()
cv2.destroyAllWindows()
import cv2
# 创建一个 VideoCapture 对象,参数传入path打开指定视频
cap = cv2.VideoCapture("resources/piano.mp4")
while True:
# 读取一帧
ret, frame = cap.read()
# 如果读取成功,显示这一帧
if ret:
cv2.imshow("Frame", frame)
# 按 "q" 健退出循环
if cv2.waitKey(15) & 0xFF == ord('q'): # cv2.waitKey(15) 影响视频播放速度
break
# 释放资源并关闭窗口
cap.release()
cv2.destroyAllWindows()
4.2.2保存视频
import cv2
# 定义视频捕获对象
cap = cv2.VideoCapture(0)
# 检查是否成功打开摄像头:
if not cap.isOpened():
print("ERROR: could not open camera.")
exit()
# 获取摄像头的帧宽度和帧高度
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# 定义视频编码器和输出文件
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # 或者使用 "XVID"
out = cv2.VideoWriter('output.mp4', fourcc, 20.0,
(frame_width, frame_height)) # 视频帧率为:20.0 fps
while True:
ret, frame = cap.read()
if not ret:
print("Failed to grab frame.") # 抓取帧失败
break
# 将当前帧写入输出视频文件
out.write(frame)
# 显示当前帧
cv2.imshow("frame", frame)
# 按 q 健退出循环
if cv2.waitKey(1) & 0xFF == ord("q"):
break
# 释放资源
cap.release()
out.release()
cv2.destroyAllWindows()
4.3综合案例:OpenCV视频画面处理
用OpenCV打开一段视频,将每一帧画面压缩成540p,对画面进行垂直翻转,转为黑白,然后添加高斯噪声,把处理好的每一帧画面保存成一个mp4文件
import cv2
import numpy as np
def add_gaussian_noise(image):
row, col = image.shape
mean = 0 # 均值
sigma = 15 # 标准差
gauss = np.random.normal(mean, sigma, (row, col))
noisy = image + gauss
noisy_img = np.clip(noisy, 0, 255) # 让noisy中的值,小于 0 的变为 0,大于 255 的变为 255
return noisy_img.astype(np.uint8) # 函数用于将数组的数据类型转换为指定的类型。
# 输入和输出视频文件名
input_video = "4.opencv_demos/resources/outdoor.mp4"
output_video = "output.mp4"
# 打开输入视频
cap = cv2.VideoCapture(input_video)
# 获取视频的帧率和帧大小
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# 计算新的帧大小(540p)
new_height = 540
new_width = int((new_height / frame_height) * frame_width)
# 创建视频写入对象
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_video, fourcc, fps,
(new_width, new_height), isColor=False)
while True:
ret, frame = cap.read()
if not ret:
break
# 调整帧大小
frame = cv2.resize(frame, (new_width, new_height))
# 转换为灰度图像
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 垂直翻转画面
frame = cv2.flip(frame, 1) # 左右互换
# 添加高斯噪音
frame = add_gaussian_noise(frame)
# 写入输出视频
out.write(frame)
# 释放资源
cap.release()
out.release()
cv2.destroyAllWindows()
5、pickle文件操作
5.1保存数据与加载数据
import pickle
# 示例数据
data = {
"name": "john",
"age": 30,
"is_student": False,
"grades": [85, 90, 78, 92]
}
# 使用 pickle 保存数据
with open('data.pkl', 'wb') as file:
pickle.dump(data, file)
with open('data.pkl', 'rb') as file:
loaded_data = pickle.load(file)
print(loaded_data)
5.2综合案例
当训练人工智能算法时,往往需要很长的时间,当训练中断时,如果能提前做好权重保存功能,能很快的恢复上次进度,以继续进行训练。
import pickle
import time
import os
import numpy as np
# 模拟耗时的权重计算过程
def calculate_weights():
print("开始计算权重......")
time.sleep(5) # 模拟耗时操作
weights = np.random.rand(10, 10) # 随机生成权重,10行10列的数组
print("权重计算完成.")
return weights
# 保存权重和epoch到文件
def save_weights(weights, epoch, filename="weights.pkl"):
data = {"weights": weights, "epoch": epoch}
with open(filename, 'wb') as file:
pickle.dump(data, file)
print(f"权重和epoch已保存到{filename}.")
# 从文件加载权重和epoch
def load_weights(filename="weights.pkl"):
with open(filename, 'rb') as file:
data = pickle.load(file)
print(f"权重和epoch以从{filename}加载.")
return data["weights"], data["epoch"]
# 主程序
def main():
weights_file = "weights.pkl"
total_epochs = 100 # 假设我们总共需要训练100个epochs
# 如果权重文件存在,则加载权重和epoch
if os.path.exists(weights_file):
weights, start_epoch = load_weights(weights_file)
else:
# 否则,从第一个epoch开始,并计算权重
weights = calculate_weights()
start_epoch = 0
# 继续训练剩余的epochs
for epoch in range(start_epoch, total_epochs):
print(f"开始训练epoch {epoch}...")
# 这里进行实际的训练代码...
time.sleep(1) # 模拟训练过程
print(f"完成训练epoch {epoch}.")
# 每个epoch结束后保存权重和epoch的信息
save_weights(weights, epoch, weights_file)
if __name__ == "__main__":
main()