在教育辅助平台中,我开发了错题整理和刷题功能,错题整理需要能够从用户上传的题目图片和题解图片中准确识别出内容,刷题功能则需要能够识别用户的答题内容(可能会是手写)
技术选择
经过调研和实践,最终基于有道智云的AI平台提供的教育领域接口开发上述功能。
代码实现
1.接口调用文件pic_rcg.py
import base64
import requests
from .AuthV3Util import addAuthParams
# 您的应用ID
APP_KEY = '6e043dfa403c2c0a'
# 您的应用密钥
APP_SECRET = 'HZdQ80OEw87HSMN7HDAheVqdo8QXD1cv'
# 待识别图片路径, 例windows路径:PATH = "C:\\youdao\\picture.jpg"
def createRequest(path):
'''
note: 将下列变量替换为需要请求的参数
取值参考文档: https://ai.youdao.com/DOCSIRMA/html/ocr/api/ztsbhgs/index.html
'''
# 识别类型 10011:识别结果公式完美还原; 10012:识别结果适合搜题;
detectType = '10011'
imageType = '1'
docType = 'json'
# 数据的base64编码
img = readFileAsBase64(path)
data = {'img': img, 'detectType': detectType, 'imageType': imageType, 'docType': docType}
addAuthParams(APP_KEY, APP_SECRET, data)
header = {'Content-Type': 'application/x-www-form-urlencoded'}
res = doCall('https://openapi.youdao.com/ocr_formula', header, data, 'post')
return str(res.content, 'utf-8')
def doCall(url, header, params, method):
if 'get' == method:
return requests.get(url, params)
elif 'post' == method:
return requests.post(url, params, header)
def readFileAsBase64(path):
f = open(path, 'rb')
data = f.read()
return str(base64.b64encode(data), 'utf-8')
# 网易有道智云整题识别含公式服务api调用demo
# api接口: https://openapi.youdao.com/ocr_formula
if __name__ == '__main__':
createRequest()
需要将图片进行base64编码。
doCall函数是入口函数,调用这个函数就能将图片的base64编码传递给平台,并接收到平台返回的结果。
2.返回结果处理文件json_process.py文件
接口的返回结果如下所示
{
"orientation": "",
"angle": null,
"errorCode": "0",
"lines": [
[
{
"boundingBox": "112,86,335,86,335,138,112,138",
"text_height": 49,
"words": [
{
"boundingBox": "112,89,176,87,176,136,112,138",
"word": "根"
},
{
"boundingBox": "182,87,218,87,218,136,182,136",
"word": "据"
},
{
"boundingBox": "224,87,253,86,253,135,224,136",
"word": "题"
},
{
"boundingBox": "259,86,277,86,277,135,259,135",
"word": "意"
},
{
"boundingBox": "282,86,312,86,312,135,282,135",
"word": ","
},
{
"boundingBox": "318,86,335,86,335,135,318,135",
"word": "得"
}
],
"text": "根据题意,得",
"type": "text"
},
{
"boundingBox": "336,68,844,68,844,158,336,158",
"text_height": 84,
"words": [
{
"boundingBox": "336,68,844,74,844,158,336,152",
"word": " 2x - \\frac { 1 } { 3 } ( 91 - x ) = ( 91 - x ) - x"
}
],
"text": "2x - \\frac { 1 } { 3 } ( 91 - x ) = ( 91 - x ) - x",
"type": "formula"
}
],
[
{
"boundingBox": "103,164,611,164,611,223,103,223",
"text_height": 54,
"words": [
{
"boundingBox": "103,164,611,169,611,223,103,218",
"word": " 6x - ( 91 - x ) = 3 ( 91 - x ) - 3x"
}
],
"text": "6x - ( 91 - x ) = 3 ( 91 - x ) - 3x",
"type": "formula"
},
{
"boundingBox": "612,169,625,169,625,224,612,224",
"text_height": 55,
"words": [
{
"boundingBox": "612,169,625,169,625,224,612,224",
"word": ","
}
],
"text": ",",
"type": "text"
}
],
[
{
"boundingBox": "103,229,555,229,555,287,103,287",
"text_height": 53,
"words": [
{
"boundingBox": "103,229,555,234,555,287,103,282",
"word": " 6x - 91 + x = 273 - 3x - 3x"
}
],
"text": "6x - 91 + x = 273 - 3x - 3x",
"type": "formula"
},
{
"boundingBox": "556,227,569,227,569,294,556,294",
"text_height": 67,
"words": [
{
"boundingBox": "556,227,569,227,569,294,556,294",
"word": ","
}
],
"text": ",",
"type": "text"
}
],
[
{
"boundingBox": "103,294,563,294,563,345,103,345",
"text_height": 51,
"words": [
{
"boundingBox": "103,294,563,294,563,345,103,345",
"word": " 6x + x + 3x + 3x = 273 + 91"
}
],
"text": "6x + x + 3x + 3x = 273 + 91",
"type": "formula"
}
],
[
{
"boundingBox": "103,361,284,361,284,410,103,410",
"text_height": 47,
"words": [
{
"boundingBox": "103,361,284,363,284,410,103,408",
"word": " 13x = 364"
}
],
"text": "13x = 364",
"type": "formula"
}
],
[
{
"boundingBox": "98,424,222,424,222,473,98,473",
"text_height": 46,
"words": [
{
"boundingBox": "98,424,222,427,222,473,98,470",
"word": " x = 28"
}
],
"text": "x = 28",
"type": "formula"
}
]
]
}
为了从中拿到可以显示到屏幕上的文本,还需要对返回的json对象进行处理,来提取其中的公式和文本。
处理函数如下
def process_json(json_str):
print(json_str)
json_obj = json.loads(json_str)
# Extract and reconstruct text with LaTeX formulas
regions = json_obj["Result"]["regions"]
reconstructed_text = ""
for region in regions:
lines = region["lines"]
for line in lines:
line_text=""
for element in line:
if element["type"] == "text":
line_text += element["text"]
elif element["type"] == "formula":
line_text += element["text"]
line_text += " "
reconstructed_text += "&"+line_text+"\\"+"\\"+"\n"
reconstructed_text = "\\begin{align}\n"+ reconstructed_text+"\end{align}"
return reconstructed_text
首先提取其中所有的lines,然后遍历lines取出所有的element。
因为最终以latex格式进行渲染,所以这里对text的处理是按照latex的格式进行处理,在每行的末尾加上\\来进行换行,同时在左侧加上&进行左对齐。
最终得到的处理结果与下面的形式类似:
\begin{align}
&2.下列运算正确的是\\
&a ^ { 5 } + a ^ { 5 } = a ^ { 10 }\\
&a ^ { \circ } \div a ^ { - 1 } = a\\
&a ^ { 6 } \times a ^ { 4 } = a ^ { 24 }\\
&a ^ { 4 } - a ^ { 4 } = a ^ { 0 }\
\end{align}