提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
文章目录
paddleocr文本识别
import cv2
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR, draw_ocr
from IPython.display import Image as IPImage, display
import matplotlib.pyplot as plt
import os
import cv2
from matplotlib import pyplot as plt
# image1 = cv2.imread("/home/yangxy/pythonproject/OCR/assets/112.png")
# # 将输入图像转为灰度图
# gray = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
# # 绘制灰度图
# # plt.subplot(311), plt.imshow(gray, "gray")
# # 绘制原图
# plt.subplot(311), plt.imshow(cv2.cvtColor(image1, cv2.COLOR_BGR2RGB))
# plt.title("input image"), plt.xticks([]), plt.yticks([])
# # 对灰度图使用 Ostu 算法
# ret1, image = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)
# # 绘制灰度直方图
# plt.subplot(312), plt.hist(gray.ravel(), 256)
# # 标注 Ostu 阈值所在直线
# plt.axvline(x=ret1, color='red', label='otsu')
# plt.legend(loc='upper right')
# plt.title("Histogram"), plt.xticks([]), plt.yticks([])
# # 绘制二值化图像
# plt.subplot(313), plt.imshow(image, "gray")
# plt.title("output image"), plt.xticks([]), plt.yticks([])
# plt.show()
# # 图像颜色再次反转
# reverse=cv2.bitwise_not(image)
# color=cv2.cvtColor(reverse,cv2.COLOR_GRAY2BGR)
# plt.figure(figsize=(6,16))
# plt.imshow(color), plt.xticks([]), plt.yticks([])
# 创建 PaddleOCR 实例
ocr = PaddleOCR(use_angle_cls=True, lang="en")
# 指定输入图像路径
img_path = '/home/yangxy/pythonproject/OCR/assets/112.png'
# # 读取反转后的BGR图像
# reversed_bgr_image = th1 # 这里使用之前反转后的图像
# # 创建一个目录来保存单独的框选结果图片
# output_dir = 'box_results'
# os.makedirs(output_dir, exist_ok=True)
# 进行文本识别
result = ocr.ocr(img_path, cls=True)
# 读取原始图像
image = cv2.imread(img_path)
# # 将BGR图像转换为RGB格式(PaddleOCR要求输入为RGB格式)
# image = cv2.cvtColor(img_path, cv2.COLOR_BGR2RGB)
# 用于存储所有文本内容的列表
all_texts = []
# 提取识别结果
for idx in range(len(result)):
res = result[idx]
for line in res:
# 提取文本框坐标
box = line[0]
box = np.array(box).astype(np.int32)
# 提取文本内容、坐标、可信度
text = line[1][0]
confidence = line[1][1]
print(f"文本内容: {text}, 坐标: {box}, 可信度: {confidence:.2f}")
# 绘制矩形框
cv2.polylines(image, [box], isClosed=True, color=(0, 0, 255), thickness=2)
# # 从原始图像中提取并保存框选的部分图片
# x1, y1 = box[0]
# x2, y2 = box[2]
# cropped_image = image[y1:y2, x1:x2]
# output_img_path = os.path.join(output_dir, f'box_{idx + 1}.jpg')
# cv2.imwrite(output_img_path, cropped_image)
# # 显示单独的框选图片
# display(IPImage(filename=output_img_path))
# 将文本内容添加到列表中
all_texts.append(text)
# 输出全部文本内容
print("全部文本内容:")
for text in all_texts:
print(text)
# # 保存带有框选的总结果图像
# output_img_path = 'result_with_boxes.jpg'
# cv2.imwrite(output_img_path, image)
# 显示总结果图像
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()
# # 输出带有框选的总结果图像文件路径
# print(f"带有框选的总结果图像保存在:{output_img_path}")
# # 输出单独的框选结果图片文件路径
# print(f"单独的框选结果图片保存在目录:{output_dir}")
表格识别
# 先是输入图像
import cv2
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR
import openpyxl
from matplotlib import pyplot as plt
from IPython.display import display
# # 读取表格图像并显示
# img = cv2.imread('/home/yangxy/pythonproject/OCR/assets/133.png')
# display(Image.open('/home/yangxy/pythonproject/OCR/assets/133.png'))
img='/home/yangxy/pythonproject/OCR/assets/133.png'
image = cv2.imread(img, 1)
# 灰度图片
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 二值化
binary = cv2.adaptiveThreshold(~gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, -5)
# 将二值化图像转换为PIL图像
pil_binary = Image.fromarray(binary)
# 显示PIL图像
display(pil_binary)
# 通常,较大的 scale 值会导致更粗的线条被检测出来,而较小的 scale 值会导致更细的线条被检测出来。
rows, cols = binary.shape
scale = 40
# 识别横线
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (cols // scale, 1))
eroded = cv2.erode(binary, kernel, iterations=1)
dilatedcol = cv2.dilate(eroded, kernel, iterations=1)
# 识别竖线
scale =20
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, rows // scale))
eroded = cv2.erode(binary, kernel, iterations=1)
dilatedrow = cv2.dilate(eroded, kernel, iterations=1)
# 标识交点
bitwiseAnd = cv2.bitwise_and(dilatedcol, dilatedrow)
# 标识表格
merge = cv2.add(dilatedcol, dilatedrow)
# 两张图片进行减法运算,去掉表格框线
merge2 = cv2.subtract(binary, merge)
# 识别黑白图中的白色交叉点,将横纵坐标取出
ys, xs = np.where(bitwiseAnd > 0)
mylisty = [] # 纵坐标
mylistx = [] # 横坐标
# 通过排序,获取跳变的x和y的值,说明是交点,否则交点会有好多像素值值相近,我只取相近值的最后一点
# 这个10的跳变不是固定的,根据不同的图片会有微调,基本上为单元格表格的高度(y坐标跳变)和长度(x坐标跳变)
i = 0
myxs = np.sort(xs)
for i in range(len(myxs) - 1):
if (myxs[i + 1] - myxs[i] > 10):
mylistx.append(myxs[i])
i = i + 1
mylistx.append(myxs[i]) # 要将最后一个点加入
i = 0
myys = np.sort(ys)
# print(np.sort(ys))
for i in range(len(myys) - 1):
if (myys[i + 1] - myys[i] > 10):
mylisty.append(myys[i])
i = i + 1
mylisty.append(myys[i]) # 要将最后一个点加入
# 遍历mylistx和mylisty中的坐标点,并在图像上绘制矩形框
for x1, x2 in zip(mylistx, mylistx[1:]):
for y1, y2 in zip(mylisty, mylisty[1:]):
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) # 使用绿色绘制矩形框
# 显示带有矩形框的图像
display(Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)))
# paddleocr文字识别
ocr = PaddleOCR(det=True)
# 循环y坐标,x坐标分割表格
mylist = []
for i in range(len(mylisty) - 1):
row = []
for j in range(len(mylistx) - 1):
# 在分割时,第一个参数为y坐标,第二个参数为x坐标
ROI = image[mylisty[i] + 3:mylisty[i + 1] -3, mylistx[j]:mylistx[j + 1] -3] # 减去3的原因是由于我缩小ROI范围
# 将二值化图像转换为PIL图像
pil_binary = Image.fromarray(ROI)
# 显示PIL图像
display(pil_binary)
# cv2.imshow("分割后子图片展示:", ROI)
# cv2.waitKey(0)
# 检查ROI是否为空
result = ocr.ocr(ROI, det=True)
print(result)
if result==[[]]:
text_len = 0
else:
text_len = len(result)
tmptxt = ' '
txt = ' '
if text_len != 0:
for result1 in result:
for sublist in result1:
for inner_list in sublist:
if isinstance(inner_list, tuple):
txt += '\n' + inner_list[0]
row.append(txt)
# print(row)
j = j + 1
i = i + 1
mylist.append(row)
print(mylist)
# 将识别内容导入到excel表格里
wb = openpyxl.Workbook()
ws = wb.active
ws.title = '我的课程表'
# 遍历数据并将其写入 Excel 表格
for r in range(len(mylist)):
for c in range(len(mylist[0])):
cell_value =mylist[r][c]
ws.cell(row=r + 1, column=c + 1, value=cell_value)
# ws.cell(row=r + 1, column=c + 1).alignment = openpyxl.styles.Alignment(wrapText=True)
# excel中的行和列是从1开始计数的,所以需要+1
file_path='/home/yangxy/pythonproject/OCR/assets/2.xlsx'
wb.save(file_path) # 注意,写入后一定要保存
print("成功写入文件: " + file_path + " !")