基于PyQT5、OCR、文字转语言等实现翻译功能(自动识别语言),实现词频统计,生成词云等,读取txt文件内容
前言
提示:
基于PyQT5、OCR、文字转语言等实现翻译功能(自动识别语言)
参考链接:https://blog.csdn.net/m0_46778548/article/details/121042773?ops_request_misc=&request_id=&biz_id=102&utm_term=%E6%9C%89%E9%81%93%E7%BF%BB%E8%AF%91&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-3-121042773.nonecase&spm=1018.2226.3001.4187https://blog.csdn.net/melo_1/article/details/105036899https://blog.csdn.net/melo_1/article/details/105036899https://blog.csdn.net/melo_1/article/details/105036899
用Python破解有道翻译反爬虫机制_南窗客斯黄的博客-CSDN博客_爬虫有道翻译https://blog.csdn.net/m0_46778548/article/details/121042773?ops_request_misc=&request_id=&biz_id=102&utm_term=%E6%9C%89%E9%81%93%E7%BF%BB%E8%AF%91&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-3-121042773.nonecase&spm=1018.2226.3001.4187
一、程序演示
二、核心代码
1.翻译
代码如下:
def translatefun(self):
import urllib.request
import urllib.parse
import json
a = "123"
def translate_action(content):
# print(content)
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
# 需要打开网页的链接,这个在Request URL栏
head = {}
head[
'User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
# 模拟浏览器访问,简单的防止反爬机制。这个在Request Headers中的User-Agent栏。
data = {}
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15847864360501'
data['sign'] = 'f762faa6901c6cf473fce719f8238ca8'
data['ts'] = '1584786436050'
data['bv'] = '0ed2e07b89acaa1301d499442c9fdf79'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_REALTlME'
# 将Form Data数据存入data字典中。
data = urllib.parse.urlencode(data).encode('utf-8')
# 将data以utf-8的形式编码
req = urllib.request.Request(url, data, head)
# 以代码中的data与head形式访问代码中的url链接
response = urllib.request.urlopen(req)
# 打开url网页并传送data与head
html = response.read().decode('utf-8')
# 读取打开的网页并进行utf-8解码
target = json.loads(html)
# 识别json
print(target['translateResult'])
result = ''
for i in target['translateResult']:
for j in range(len(i)):
print(i[j]['tgt'])
result += i[j]['tgt']
result += '\n'
print(result)
return result
try:
lt=translate_action(self.leftT.toPlainText())
self.rightT.setText(str(lt))
except:
print("translatefun 128 err")
2.词频统计
代码如下:
def analyseText(self):
txt = self.rightT.toPlainText()
words = jieba.lcut(txt)
# print(words)
counts = {}
# while i<len(words):
for word in words:
if len(word) == 1:
continue
else:
rword = word
counts[rword] = counts.get(rword, 0) + 1 # 返回指定键的值,如果值不在字典中返回default值即0
sed = sorted(counts.items(), key=lambda x: x[1], reverse=True)[:20]
tk20=[]
tv20=[]
for (k, v) in sorted(counts.items(), key=lambda x: x[1], reverse=True)[:10]:
print("{}\t\t\t\t{}".format(k, v))
tk20.append(k)
tv20.append(v)
plt.rcParams['font.family'] = 'STsong'
print(tk20,tv20)
plt.bar(tk20, tv20, color='lightskyblue')
plt.xlabel('出现词语')
plt.ylabel('出现次数')
plt.title('词频统计柱状图')
plt.show()
3.词云图生成:
def wordimg(self):
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import jieba
getText=self.rightT.toPlainText()
jtext = jieba.cut(getText, cut_all=True)
jtextsplit = " ".join(jtext)
font = r'C:\Windows\Fonts\simfang.ttf' # 设置中文字体,否则词云图可能不显示中文
my_wordcloud = WordCloud(font_path=font).generate(jtextsplit)
plt.imshow(my_wordcloud)
plt.axis("off") # 消除坐标轴
plt.show()
4.图片识别文字
def extractText(self):
from cnocr.utils import read_img
from cnocr import CnOcr
# 图片转文字
def exT(imgpath):
ocr = CnOcr()
img_fp = imgpath
img = read_img(img_fp)
res = ocr.ocr(img)
text = ''
for i in res:
# print("".join(i[0]))
text += "".join(i[0])
text += "\n"
return text
fname = QFileDialog.getOpenFileName()
text=exT(fname[0])
self.leftT.setText(text)
如需要整套源码请点击python有道翻译、ocr图片转文字、文字转语音、统计词频、生成词云-Python文档类资源-CSDN下载
总结
由于本人能力有限,以后的学习中将继续完善自己的编码能力