#!/usr/bin/env python3 # coding: utf-8 from pypinyin import * from editing_distance import auto_correct_ch, auto_correct_en from text_utils import is_chinese_string, is_alphabet_string import config import pickle from flask import Flask, request app = Flask(__name__) class WordCorrect: def __init__(self): self.char_path = config.char_path self.model_path = config.model_path self.charlist = [word.strip() for word in open(self.char_path, "r", encoding="utf-8") if word.strip()] self.pinyin_dict = self.load_model(self.model_path) def load_model(self, model_path): # 读取pickle with open(model_path, 'rb') as handle: data = pickle.load(handle) # Warning: If adding something here, also modifying saveDataset return data @app.route('/test', methods=['get', 'post']) def test(): corrector = WordCorrect() error_phrase = request.values.get('err_phrase') # 获取参数 # 纯中文 if is_chinese_string(error_phrase): word_pinyin = ','.join(lazy_pinyin(error_phrase)) # 拼音 result = corrector.pinyin_dict.get(word_pinyin, 'na') # 根据拼音来找 if result == "na": print(auto_correct_ch(error_phrase)) # 根据编辑距离来找 return auto_correct_ch(error_phrase) else: print(max(result, key=result.get)) # 返回拼音词典value最大的key return max(result, key=result.get) # 纯英文 elif is_alphabet_string(error_phrase): print(auto_correct_en(error_phrase.lower())) return auto_correct_en(error_phrase) # 汉字 + 拼音 else: word_pinyin = ','.join(lazy_pinyin(error_phrase)) # 拼音 result = corrector.pinyin_dict.get(word_pinyin, 'na') # 根据拼音来找 """web接口测试模式""" if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug='True') # ip访问 http://ip地址:5000/test?err_phrase&#
基于编辑距离的中英文自动纠错
该博客介绍了一个基于编辑距离的文本纠错系统,它能处理中英文错误的单词。系统通过拼音转换和编辑距离算法找到最可能的正确词汇,并提供了一个Web接口进行测试。此外,还包含了对汉字、拼音、英文字符的判断辅助函数。
摘要由CSDN通过智能技术生成