# pip install paddlepaddle paddleocr
import cv2
import numpy as np
from paddleocr import PaddleOCR
import re
# 定义一个函数来提取日期和时间
def extract_dates_and_times(ocr_result):
# 匹配日期格式
date_patterns = [
r'\d{4}年\d{1,2}月\d{1,2}日', # 2024年07月01日
r'\d{4}年\d{1,2}月\d{1,2}', # 2024年07月01
r'\d{1,2}-\d{1,2}-\d{4}', # 07-01-2024
r'\d{1,2}/\d{1,2}/\d{4}', # 07/01/2024
r'\d{1,2}\.\d{1,2}\.\d{4}' # 07.01.2024
]
# 匹配时间格式
time_pattern = r'\d{2}:\d{2}:\d{2}' # 16:46:46
# 解析并提取日期和时间
dates = []
times = []
for line in ocr_result:
for box, (text, score) in line:
print(f"Text: {text}, Score: {score}")
# 去除中文字符并分割文本
# cleaned_text = re.sub('[\u4e00-\u9fa5]', '', text).strip()
cleaned_text = text.strip()
cleaned_text = cleaned_text.replace(' ', '')
print('=========cleaned_text:',cleaned_text)
if cleaned_text: # 确保文本非空
for pattern in date_patterns:
match_date = re.search(pattern, cleaned_text)
if match_date:
date = match_date.group()
# print('发现日期:', date)
# 标准化日期格式
if '-' in date or '/' in date or '.' in date:
parts = re.split(r'[-/.]', date)
if len(parts) == 3:
year, month, day = None, None, None
# 判断是否是以年份开头
if len(parts[0]) == 4: # 如果第一个部分是年份
year, month, day = parts
else: # 否则第一个部分是月或日
month, day, year = parts
date = f"{int(year):04d}-{int(month):02d}-{int(day):02d}"
elif '年' in date:
date = date.replace('年', '-').replace('月', '-').replace('日', '')
dates.append(date)
# 匹配时间
match_time = re.search(time_pattern, cleaned_text)
if match_time:
time = match_time.group()
print('发现时间:', time)
times.append(time)
# 组合日期和时间
if dates and times:
date = dates[0]
time = times[0]
formatted_date_time = f"{date} {time}"
return formatted_date_time
else:
return None
# 定义一个函数来获取图片中的时间信息
def extract_time_from_image(image_path):
# 读取图片
image = cv2.imread(image_path)
# 使用PaddleOCR进行文字识别
ocr = PaddleOCR(use_angle_cls=True, lang='ch')
result = ocr.ocr(image, cls=True)
# 提取日期和时间
dates_and_times = extract_dates_and_times(result)
return dates_and_times
if __name__ == "__main__":
# 读取图片路径
image_path = "/..../test1.png"
# 调用函数并打印结果
time_info = extract_time_from_image(image_path)
# 输出最终结果
if time_info:
print(f"Formatted Date and Time: {time_info}")
else:
print("No valid date and time found in the image.")
提取图片中的日期信息
最新推荐文章于 2024-10-02 19:05:16 发布