基于Python的简易文本分析程序

ym_SYSU

已于 2023-06-23 16:07:40 修改

阅读量899

点赞数

文章标签： python 开发语言

于 2023-06-23 10:48:19 首次发布

本文链接：https://blog.csdn.net/ym_SYSU/article/details/131348475

版权

"""
项目介绍

情景假设：一位考生根据题目要求写出了一份作文，考生略做修改后提交第二份文档，
        考生将题目、两份作文输入程序后得到一份评分报告。评分报告给出以百分数为形式的数据为考生提供修改建议。
         
实现功能：该项目主要关注以下几点：主题契合度、词汇高级度、查重率与体检报告。
        考生输入文本后，评分报告以pyGUI的形式呈现给用户，显示各类指标、图表和建议。
"""

from tkinter import *
from tkinter import scrolledtext
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from tkinter import *
from tkinter import scrolledtext
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib
matplotlib.use("TkAgg")
import re
import hashlib
import pandas as pds
import numpy as np
import string
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import os
import matplotlib.pyplot as plt
import re
import hashlib
import pandas as pds
import numpy as np
import string
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import time



"""
项目介绍

情景假设：一位考生根据题目要求写出了一份作文，考生略做修改后提交第二份文档，
        考生将题目、两份作文输入程序后得到一份评分报告。评分报告给出以百分数为形式的数据为考生提供修改建议。
         
实现功能：该项目主要关注以下几点：主题契合度、词汇高级度、查重率与体检报告。
        考生输入文本后，评分报告以pyGUI的形式呈现给用户，显示各类指标、图表和建议。
"""





# 数据库输入
word_storage = pds.read_excel('词库.xlsx')
"""
从不同词库中提取单词
词库有四个，分别为：初中词汇，高中词汇，四六级词汇，托福词
"""


"""
TF-IDF计算词频以体现主题契合度，徐铭阳提供
"""
def TF_IDF():
    # 函数封装:对词频进行统计
    def word_tfidf_calculating(the_address_of_text):
        # 读取文本
        with open(the_address_of_text) as f:
            text=f.read()

        # 提取关键词
        tfidf=TfidfVectorizer(stop_words='english', tokenizer=None)
        tfs=tfidf.fit_transform([text])

        # 获取关键词和对应的得分
        feature_names=tfidf.get_feature_names_out()
        scores=tfs.todense().tolist()[0]

        # 将关键词和对应的得分组成元组，然后根据得分进行排序
        # 将关键词和对应的得分组成元组，然后根据得分进行排序
        keywords_in_list=sorted(zip(feature_names, scores), key=lambda x: x[1], reverse=True)
        keywords_in_dic=dict(keywords_in_list)

        return keywords_in_list

    # 调用TF-IDF函数，实现对题目、文本对词频统计
    keyword_topic=word_tfidf_calculating("topic.txt")
    keyword_text1=word_tfidf_calculating("text1.txt")
    keyword_text2=word_tfidf_calculating("text2.txt")


    # keyword_topic为一个字典，其键为关键词，值为得分，找出得分前十的关键词，以字典形式输出
    # 函数封装：输出得分前十的关键词，以字典形式输出  # 函数封装：输出得分前十的关键词，以字典形式输出
    def the_top_word_output(which_text):
        the_top_word={}
        for item in which_text[:10]:
            the_top_word[item[0]]=item[1]
        return the_top_word

    top_word_topic = the_top_word_output(keyword_topic)
    top_word_text1 = the_top_word_output(keyword_text1)
    top_word_topi2 = the_top_word_output(keyword_text2)



    # 函数封装： 关键词词频得分计算
    def the_total_score_of_keyword(which_text):
        set1=set([item[0] for item in keyword_topic])
        set2=set(item[0] for item in which_text)
        keyword_common_elements=list(set1.intersection(set2))

        # 函数封装：从列表中抓取元素
        def extract_element1_in_list(the_list, element0):
            global element1
            for item in the_list:
                if item[0] == element0:
                    element1=item[1]
            return element1

        # 利用自定义数学公式计算得分
        the_total_score_of_keyword_in_circle=0
        for marks_of_keys in keyword_common_elements:
            the_total_score_of_keyword_in_circle+=extract_element1_in_list(keyword_topic, marks_of_keys) \
                                                  * extract_element1_in_list(which_text, marks_of_keys)
        return the_total_score_of_keyword_in_circle

    # 调用函数：求得text1与text2的得分

    score_of_text1=the_total_score_of_keyword(keyword_text1)
    score_of_text2=the_total_score_of_keyword(keyword_text2)

    # 得出关键词词频结论
    the_ratio_after_modified=1 - score_of_text2 / score_of_text1
    print("您的文章修改后，关键词复现词频得分提高了{:%}".format(the_ratio_after_modified))
    return [top_word_topic, top_word_text1, top_word_topi2]

x = TF_IDF()


"""
SimHash—Hamming算法比较文本相似度，以计算查重率，徐铭阳提供
"""
def CALC_repetition(text1_address, text2_address):
    def CALC_SimHash_Hamming(text1_address1, text2_address1):  # 徐铭阳提供
        """
        计算出两个文本的哈希值，返回汉明距离
        """
        global distance_between_texts
        with open(text1_address, 'r') as f:
            text_topic=f.read()
        with open(text2_address, 'r') as f:
            text1=f.read()

        class Simhash:
            def __init__(self, text, hashbits=64):
                self.hashbits=hashbits
                self.hash=self.simhash(text)

            def __str__(self):
                return str(self.hash)

            def simhash(self, tokens):
                # 初始化一个64位的列表，用于存储特征哈希值
                v=[0] * self.hashbits
                # 遍历每一个单词
                for t in [self._string_hash(x) for x in tokens]:
                    # 对每一个哈希值的每一个位进行更新
                    for i in range(self.hashbits):
                        bitmask=1 << i
                        if t & bitmask:
                            v[i]+=1  # 如果该位为1，则该位的计数器加1
                        else:
                            v[i]-=1  # 如果该位为0，则该位的计数器减1
                fingerprint=0
                for i in range(self.hashbits):
                    if v[i] >= 0:
                        fingerprint|=1 << i  # 如果计数器大于等于0，则该位设为1
                return fingerprint

            def _string_hash(self, v):
                # 将字符串进行hash，产生一个64位的整数
                return int(hashlib.md5(v.encode('utf-8')).hexdigest(), 16)

            def hamming_distance(self, other):
                # 计算两个整数二进制表示的汉明距离
                x=(self.hash ^ other.hash) & ((1 << self.hashbits) - 1)
                d=0
                while x:
                    d+=1
                    x&=x - 1
                return d

        def get_words(text):
            # 按照空格进行分词，同时过滤掉长度小于等于1的词语
            words=re.compile('\w+').findall(text.lower())
            return [w for w in words if len(w) > 1]

        def get_stopwords():
            # 获取停用词表
            stop_words=set()
            with open("停用词.txt", "r", encoding="utf-8") as f:
                for line in f:
                    stop_words.add(line.strip())
            return stop_words

        if __name__ == '__main__':
            text_a=text_topic
            text_b=text1

            words1=get_words(text_a)
            words2=get_words(text_b)

            stopwords=get_stopwords()
            words1=[w for w in words1 if w not in stopwords]
            words2=[w for w in words2 if w not in stopwords]

            sh1=Simhash(words1)
            sh2=Simhash(words2)

            distance_between_texts=sh1.hamming_distance(sh2)
        return distance_between_texts

    def text_split(text):
        """
        将文本按句子划分
        """
        with open(text, 'r') as f:
            text_after_split=f.read()
        sentences=text_after_split.split(". ")
        sentences=[s.strip() for s in sentences]
        return sentences

    def text_compared_based_on_sentences(list_sentence_of_text1,
                                         list_sentence_of_text2):
        """
        以两个被分割的文本进行比较，返回相同的句子数目、总句子数所组成的列表
        其中text1作为文本库，text2作为待查重的文本
        返回「重复的句子的单词数,待查重的文本的单词数」
        """

        def CALC_words_in_sentences(list1):
            word_lst=' '.join(list1).split()
            return len(word_lst)

        list1=text_split(list_sentence_of_text1)
        list2=text_split(list_sentence_of_text2)
        the_words_amount_of_list2=CALC_words_in_sentences(list2)

        amount_of_words_of_similar_sentence=0  # 双列表遍历后找哈希近似，统计重复的句子数和句子词数
        for element1 in list1:
            for element2 in list2:
                if CALC_SimHash_Hamming(element1, element2) <= 1000:
                    the_words_amount_similar=CALC_words_in_sentences(element2)

                    amount_of_words_of_similar_sentence+=the_words_amount_similar

        info_text_compared_based_on_sentences=[amount_of_words_of_similar_sentence, the_words_amount_of_list2]

        return info_text_compared_based_on_sentences

    a=text_compared_based_on_sentences(text1_address, text2_address)

    def CALC_ratio(info_text_compared_based_on_sentences):
        """
        输入被计算后的文本比较信息，其格式为：「重复的句子的单词数,待查重的文本的单词数」
        输出目标文本的重复比例
        其中text1作为文本库，text2作为待查重的文本
        """
        the_amount_of_sentence_in_texts=int(info_text_compared_based_on_sentences[0])
        the_amount_of_sentence_similar=int(info_text_compared_based_on_sentences[1])
        the_ratio_of_repetition=the_amount_of_sentence_similar / the_amount_of_sentence_in_texts

        print(f"您这篇文章的重复率为：{the_ratio_of_repetition:.2%}")
        return the_ratio_of_repetition

    b=CALC_ratio(a)
    return b




rating1_of_repetition = CALC_repetition("topic.txt",
                "text1.txt")
rating2_of_repetition = CALC_repetition("topic.txt",
                "text2.txt")


ratio_of_repetition = rating2_of_repetition/rating1_of_repetition


"""
高频词汇统计与评分
"""
class Storage_level:  # 定义Storage_level类
    def __init__(self, text):
        import string
        with open(text, 'r') as fp:  # 打开目标作文
            self.text = fp.read()  # 读取作文并保存在self.text中
            words = self.text.split()  # 读取作文单词，并保存在一个列表中
            self.words = [word.lower().strip(string.punctuation + string.whitespace) for word in words]

    def word_level(self, excel, sheet):  # 定义word_level函数，对词库进行处理
        read = pds.read_excel(excel, sheet_name=sheet)  # 读取词库，将单词储存在列表中
        data = read.values.tolist()
        words_set = set(tuple(i) for i in data)  # 将列表化为集合
        words_level = {str(word[0]).replace(',', '') for word in words_set}
        return words_level

    def word_storage(self, storage, junior, high, universe, toefl):  # 定义word_storage函数，将词库划分为四个等级
        # 运用word_level函数处理词库
        words_junior = self.word_level(storage, junior)
        words_high = self.word_level(storage, high)
        words_universe = self.word_level(storage, universe)
        words_toefl = self.word_level(storage, toefl)
        # 用集合的方法将词汇进行分类，共分为四个等级，等级越高，词汇越高级
        word_level1 = words_junior - (words_high | words_universe | words_toefl)
        word_level2 = words_high - words_junior - words_universe - words_toefl
        word_level3 = words_universe - words_high - words_toefl - words_junior
        word_level4 = words_toefl - words_junior - words_universe - words_high
        return [word_level1, word_level2, word_level3, word_level4]

    def analysis(self, storage, junior, high, universe, toefl):  # 定义analysis函数分析作文
        level1 = self.word_storage(storage, junior, high, universe, toefl)[0]
        level2 = self.word_storage(storage, junior, high, universe, toefl)[1]
        level3 = self.word_storage(storage, junior, high, universe, toefl)[2]
        level4 = self.word_storage(storage, junior, high, universe, toefl)[3]
        quantity_of_word = len(self.words)
        quantity_of_advanced_word = 0
        total_goals = 0
        quantity_of_level2 = 0
        quantity_of_level3 = 0
        quantity_of_level4 = 0
        for word in self.words:  # 运用循环结构对词汇进行赋分
            if word in level2:
                quantity_of_level2 += 1
                quantity_of_advanced_word += 1
                total_goals += 60
            elif word in level3:
                quantity_of_level3 += 1
                quantity_of_advanced_word += 1
                total_goals += 80
            elif word in level4:
                quantity_of_level4 += 1
                quantity_of_advanced_word += 1
                total_goals += 100
        # 计算普通词汇和四个等级的高级词汇占比
        ordinary_word = 1 - quantity_of_advanced_word/quantity_of_word
        advanced_word = quantity_of_advanced_word/ quantity_of_word
        level2_word = quantity_of_level2/quantity_of_word
        level3_word = quantity_of_level3/quantity_of_word
        level4_word = quantity_of_level4/quantity_of_word
        average_advanced_score = total_goals/quantity_of_advanced_word
        # 返回计算结果
        try:
            return [ordinary_word, advanced_word, level2_word, level3_word, level4_word, average_advanced_score]
        except:
            print('请重新输入数据')


# 定义get_result函数，输入原文和修改后作文
def get_result(text_name1, text_name2):
    text1 = Storage_level(text_name1)
    text2 = Storage_level(text_name2)
    text1_analysis = text1.analysis("词库.xlsx", 'junior', 'high', 'universe', 'toefl')
    text2_analysis = text2.analysis("词库.xlsx", 'junior', 'high', 'universe', 'toefl')
    # 将分析数据存储在result字典中
    result={'Proportion of Common Words': text1_analysis[0],
            'Proportion of Advanced Words': text1_analysis[1],
            'Proportion of Level 2 Advanced Words': text1_analysis[2],
            'Proportion of Level 3 Advanced Words': text1_analysis[3],
            'Proportion of Level 4 Advanced Words': text1_analysis[4],
            'Average Score of Advanced Words': text1_analysis[5],
            'Proportion of Common Words in Modified Text': text2_analysis[0],
            'Proportion of Advanced Words in Modified Text': text2_analysis[1],
            'Proportion of Level 2 Advanced Words in Modified Text': text2_analysis[2],
            'Proportion of Level 3 Advanced Words in Modified Text': text2_analysis[3],
            'Proportion of Level 4 Advanced Words in Modified Text': text2_analysis[4],
            'Average Score of Advanced Words in Modified Text': text2_analysis[5]}
    return result


words_level= get_result('text1.txt', 'text2.txt')
selected_keys = ['Proportion of Common Words', 'Proportion of Level 2 Advanced Words', 'Proportion of Level 3 Advanced Words', 'Proportion of Level 4 Advanced Words', 'Proportion of Common Words in Modified Text', 'Proportion of Level 2 Advanced Words in Modified Text', 'Proportion of Level 3 Advanced Words in Modified Text', 'Proportion of Level 4 Advanced Words in Modified Text']
selected_words_level = {k: words_level[k] for k in selected_keys}
words_level = get_result('text1.txt', 'text2.txt')
selected_keys = ['Proportion of Common Words', 'Proportion of Level 2 Advanced Words', 'Proportion of Level 3 Advanced Words', 'Proportion of Level 4 Advanced Words', 'Proportion of Common Words in Modified Text', 'Proportion of Level 2 Advanced Words in Modified Text', 'Proportion of Level 3 Advanced Words in Modified Text', 'Proportion of Level 4 Advanced Words in Modified Text']

selected_words_level1 = {k: words_level[k] for k in selected_keys[:4]}

words_level = get_result('text1.txt', 'text2.txt')
selected_keys = ['Proportion of Common Words', 'Proportion of Level 2 Advanced Words', 'Proportion of Level 3 Advanced Words', 'Proportion of Level 4 Advanced Words', 'Proportion of Common Words in Modified Text', 'Proportion of Level 2 Advanced Words in Modified Text', 'Proportion of Level 3 Advanced Words in Modified Text', 'Proportion of Level 4 Advanced Words in Modified Text']

selected_words_level2 = {k: words_level[k] for k in selected_keys[-4:]}






# 计算原文和修改后作文的'Average Score of Advanced Words'的比值
ratio_of_words_level = words_level['Average Score of Advanced Words in Modified Text']/words_level['Average Score of Advanced Words']
# 输出结果
print(get_result('text1.txt', 'text2.txt'))


"""
体检报告
"""
# 基本的语法修改
def correct_text(text):
    # 将中文标点替换为英文标点
    text=text.replace("，", ",")
    text=text.replace("。", ".")
    text=text.replace("！", "!")
    text=text.replace("？", "?")
    text=text.replace("；", ";")
    text=text.replace("：", ":")

    # 如果标点后忘记加空格，则在标点后面补上一个空格
    for i in range(len(text)):
        if text[i] in [",", ".", "!", "?", ";", ":"] and i < len(text) - 1 and text[i + 1] not in [" ", "\n"]:
            text=text[:i + 1] + " " + text[i + 1:]
    return text

text1 = correct_text(open("text1.txt", "r").read())
text2 = correct_text(open("text2.txt", "r").read())

# 体检报告
def get_info(text):
    # 段落数初始化为1
    num_paragraphs=1
    # 句子数初始化为0
    num_sentences=0

    for i in range(len(text)):
        if text[i] == "\n":
            num_paragraphs+=1
        if text[i] in [".", "?", "!"]:
            num_sentences+=1
    # 获取词汇列表
    totalwords=text.split()
    word_list=[word.lower().strip(string.punctuation + string.whitespace) for word in totalwords]
    # 统计形符数（词数）
    num_of_words=len(word_list)
    # 统计类符数（不重复出现的形符数）
    num_of_nonrep_words=len(set(word_list))
    # 统计平均词长
    avg_length=sum(map(len, word_list)) / len(word_list)
    # 统计词长标准差
    standard_diviation=sum(map(lambda x: (x - avg_length) ** 2, map(len, word_list))) / len(word_list)

    return num_paragraphs, num_sentences, num_of_words, num_of_nonrep_words, avg_length, standard_diviation


"""
可视化输出
"""
my_dict = {"key1": 0.25, "key2": 0.25, "key3": 0.25, "key4": 0.25}
workDir = os.getcwd()


def zhifangtu(a_dict):
    import matplotlib.pyplot as plt
    import matplotlib.font_manager as fm
    plt.rcParams['font.sans-serif']=['Times New Roman']
    keys=list(a_dict.keys())
    values=list(a_dict.values())
    # 设置直方图边界和颜色等属性
    plt.bar(keys, values, align='center', alpha=0.5, edgecolor='black', linewidth=1.2)
    for i, v in enumerate(values):
        plt.text(i, v + 1, str(v), ha='center', va='bottom', fontweight='bold')
    # 设置标题和标签等属性
    title_font={'fontsize': 50, 'fontweight': 'bold', 'fontstyle': 'italic', 'color': 'blue'}
    title_font1={'fontsize': 40}
    plt.title('TF–IDF Word Frequency Chart', fontdict=title_font)
    plt.xlabel('Words', fontdict=title_font1)
    plt.ylabel('TF–IDF Word Frequency', fontdict=title_font1)
    fig=plt.gcf()
    fig.set_size_inches(20, 30)
    fontprop=fm.FontProperties(size=20)
    fontprop1=fm.FontProperties(size=30)
    plt.xticks(rotation=45, ha='right', fontproperties=fontprop)
    plt.yticks(ha='right', fontproperties=fontprop1)
    # 显示图像
    plt.savefig('TF–IDF Word Frequency Chart 1')



def zhifangtu1(a_dict):
    import matplotlib.pyplot as plt
    import matplotlib.font_manager as fm
    plt.rcParams['font.sans-serif']=['Times New Roman']
    keys=list(a_dict.keys())
    values=list(a_dict.values())
    # 设置直方图边界和颜色等属性
    plt.bar(keys, values, align='center', alpha=0.5, edgecolor='black', linewidth=1.2)
    for i, v in enumerate(values):
        plt.text(i, v + 1, str(v), ha='center', va='bottom', fontweight='bold')
    # 设置标题和标签等属性
    title_font={'fontsize': 50, 'fontweight': 'bold', 'fontstyle': 'italic', 'color': 'blue'}
    title_font1={'fontsize': 40}
    plt.title('TF–IDF Word Frequency Chart', fontdict=title_font)
    plt.xlabel('Words', fontdict=title_font1)
    plt.ylabel('TF–IDF Word Frequency', fontdict=title_font1)
    fig=plt.gcf()
    fig.set_size_inches(20, 30)
    fontprop=fm.FontProperties(size=20)
    fontprop1=fm.FontProperties(size    =30)
    plt.xticks(rotation=45, ha='right', fontproperties=fontprop)
    plt.yticks(ha='right', fontproperties=fontprop1)
    # 显示图像
    plt.savefig('TF–IDF Word Frequency Chart 2')


'''fic_of_TFIDF1 = zhifangtu(x[0])
fic_of_TFIDF2 = zhifangtu(x[1])
fic_of_TFIDF3 = zhifangtu(x[2])'''


def bing(b):
    import matplotlib.pyplot as plt
    plt.rcParams['font.sans-serif']=['Times New Roman']
    keys=list(b.keys())[:4]  # 仅选择前四个键
    values=list(b.values())[:4]  # 仅选择前四个值
    colors=['pink', 'red', 'red', 'purple', 'blue']
    alpha=[0.3, 0.3, 0.9, 0.8]
    title_font1={'fontsize': 15, 'color': 'black'}
    plt.title('Chart of Advanced Words1', fontdict=title_font1)
    pie_chart=plt.pie(values, labels=keys, autopct='%1.1f%%', startangle=90, counterclock=False)
    for i in range(len(pie_chart[0])):
        pie_chart[0][i].set_color(colors[i])
        pie_chart[0][i].set_alpha(alpha[i])
        plt.savefig(os.path.join(workDir, 'Chart_of_Advanced_Words1''.png'))
    fig=plt.gcf()
    fig.set_size_inches(5, 5)
    fontprop=fm.FontProperties(size=16)
    plt.legend(prop=fontprop)

bing(selected_words_level1)


def bing1(b):
    import matplotlib.pyplot as plt
    plt.rcParams['font.sans-serif']=['Times New Roman']
    keys=list(b.keys())[:4]  # 仅选择前四个键
    values=list(b.values())[:4]  # 仅选择前四个值
    colors=['pink', 'red', 'red', 'purple', 'blue']
    alpha=[0.3, 0.3, 0.9, 0.8]
    title_font1={'fontsize': 15, 'color': 'black'}
    plt.title('Chart of Advanced Words2', fontdict=title_font1)
    pie_chart=plt.pie(values, labels=keys, autopct='%1.1f%%', startangle=90, counterclock=False)
    for i in range(len(pie_chart[0])):
        pie_chart[0][i].set_color(colors[i])
        pie_chart[0][i].set_alpha(alpha[i])
        plt.savefig(os.path.join(workDir, 'Chart_of_Advanced_Words2'+'.png'))
    fig.set_size_inches(5, 5)
    fontprop=fm.FontProperties(size=16)
    plt.legend(prop=fontprop)

bing(selected_words_level2)






def get_info(text):
    # 段落数初始化为1
    num_paragraphs=1
    # 句子数初始化为0
    num_sentences=0

    for i in range(len(text)):
        if text[i] == "\n":
            num_paragraphs+=1
        if text[i] in [".", "?", "!"]:
            num_sentences+=1
    # 获取词汇列表
    totalwords=text.split()
    word_list=[word.lower().strip(string.punctuation + string.whitespace) for word in totalwords]
    # 统计形符数（词数）
    num_of_words=len(word_list)
    # 统计类符数（不重复出现的形符数）
    num_of_nonrep_words=len(set(word_list))
    # 统计平均词长
    avg_length=sum(map(len, word_list)) / len(word_list)
    # 统计词长标准差
    standard_diviation=sum(map(lambda x: (x - avg_length) ** 2, map(len, word_list))) / len(word_list)

    return [[num_paragraphs], [num_sentences], [num_of_words], [num_of_nonrep_words], [round(avg_length, 2)],
            [round(standard_diviation, 2)]]


def blocki(f):
    import matplotlib.pyplot as plt
    headers=['Result']
    table=plt.table(cellText=f, colLabels=headers,
                    rowLabels=['Paragraph Num', 'Sentence Num', 'Word Num', 'Nonrep word Num', 'Average word Length',
                               'Word Standard Divation'], colWidths=[0.2], cellLoc='center', loc='center')
    plt.axis('off')
    plt.savefig(os.path.join(workDir, 'ff.png'))


blocki(get_info(text1))



def calculate_time(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"程序运行时间为 {end_time - start_time} 秒")
        return result
    return wrapper


@calculate_time
def my_function():
    # 这里放你要计算时间的代码
    pass

my_function() # 程序运行时间为 X 秒 ··



from tkinter import *
from tkinter import scrolledtext

from PIL import Image, ImageTk
Img1Open = Image.open(os.path.join(workDir, 'Chart_of_Advanced_Words1.png'))
Img2Open = Image.open(os.path.join(workDir, 'Chart_of_Advanced_Words1.png'))
Img3Open = Image.open(os.path.join(workDir, 'Chart_of_Advanced_Words1.png'))
Img4Open = Image.open(os.path.join(workDir, 'Chart_of_Advanced_Words1.png'))
Img5Open = Image.open(os.path.join(workDir, 'Chart_of_Advanced_Words1.png'))
checkReportPng = Image.open(os.path.join(workDir, 'ff.png'))



windowTitle = "作文评分系统"
# 用format方法给windowPrompt1和windowPrompt2传入参数text1、2
windowPrompt1 = str.format("文本1：{}", text1)
windowPrompt2 = str.format("文本2（已为您纠正基本的标点与空格错误）：{}", text2)
similarityDecPercent = str(ratio_of_repetition * 100) + "%"
wordLevelIncPercent = str(ratio_of_words_level * 100) + "%"
themeFitIncPercent = ""
# 用format方法给requirementText数据：打开题目文件，读取题目文件内容
requirementText =  "题目：\n" + str.format("{}", open("text1.txt", "r").read())


img1Png = None
img2Png = None
img3Png = None
img4Png = None
img5Png = None
checkReportPng = None

# 窗口参数

# 窗口宽度
windowWidth = 1200
# 窗口高度
windowHeight = 600
# 子窗口百分比宽度
subWindowWidth = 0.3
# 子窗口百分比高度
subWindowHeight = 0.8
# 按钮百分比宽度
buttonWidth = 0.25
# 按钮百分比高度
buttonHeight = 0.2

# 控件类

class mainWindow(Frame):

    def __init__(self, master = None):
        super().__init__(master)
        # 创建窗口
        self.rootFrame = Frame(root, width = windowWidth, height = windowHeight)
        self.rootFrame.pack(side = "top", fill = "both", expand = 1)
        self.createMainWindow(self.rootFrame)

    # 创建主窗体
    def createMainWindow(self, mainFrame):
        # 创建左子窗体
        mainFrame.leftSubWindow = scrolledtext.ScrolledText(mainFrame)
        mainFrame.leftSubWindow.insert(END,windowPrompt1)
        mainFrame.leftSubWindow.place(relheight = subWindowHeight, relwidth = subWindowWidth, relx = 0.1, rely = 0.05)
        # 创建中子窗体
        mainFrame.midSubWindow = scrolledtext.ScrolledText(mainFrame)
        mainFrame.midSubWindow.insert(END,windowPrompt2)
        mainFrame.midSubWindow.place(relheight = subWindowHeight, relwidth = subWindowWidth, relx = 0.4, rely = 0.05)
        # 创建右子窗体
        mainFrame.rightSubWindow = Frame(mainFrame)
        self.createRightWindow(mainFrame.rightSubWindow)
        mainFrame.rightSubWindow.place(relheight = subWindowHeight, relwidth = subWindowWidth, relx = 0.7, rely = 0.05)
        # 创建退出按钮
        mainFrame.quit = Button(mainFrame, text = "退出", bg = "red", command = self.master.destroy)
        mainFrame.quit.place(anchor = S, relheight = 0.05, relwidth = 0.2, relx = 0.5, rely = 0.95)

    # 填充右子窗体内容
    def createRightWindow(self, rightWindow):
        # 创建显示子窗体
        rightWindow.showWindow = Frame(rightWindow)
        rightWindow.showWindow.place(relheight = 0.7, relwidth = 0.8, relx = 0.1, rely = 0.25)
        # 创建评分比较按钮
        rightWindow.scoreCompare = Button(rightWindow, text = "评分比较", command = lambda : self.showScore(rightWindow.showWindow))
        rightWindow.scoreCompare.place(relheight = buttonHeight, relwidth = buttonWidth, relx = 0.1, rely = 0)
        # 创建检测报告按钮
        rightWindow.checkReport = Button(rightWindow, text = "检测报告", command = lambda : self.showCheckReport(rightWindow.showWindow))
        rightWindow.checkReport.place(relheight = buttonHeight, relwidth = buttonWidth, relx = 0.4, rely = 0)
        # 创建题目要求按钮
        rightWindow.requirement = Button(rightWindow, text = "题目要求", command = lambda : self.showRequirement(rightWindow.showWindow))
        rightWindow.requirement.place(relheight = buttonHeight, relwidth = buttonWidth, relx = 0.7, rely = 0)

    # 显示评分
    def showScore(self, subWindow):
        # 清空子窗体内控件
        for widget in subWindow.winfo_children() :
            widget.destroy()
        # 将子窗体分为两部分
        subWindow.rightUpSubWindow = Frame(subWindow)
        subWindow.rightUpSubWindow.place(relheight = 0.5, relwidth = 1, relx = 0, rely = 0)
        subWindow.rightDownSubWindow = Frame(subWindow)
        subWindow.rightDownSubWindow.place(relheight = 0.5, relwidth = 1, relx = 0, rely = 0.5)
        # 初始化要显示的图片
        global img1Png
        img1Png = ImageTk.PhotoImage(Img1Open,master = subWindow.rightUpSubWindow)
        global img2Png
        img2Png = ImageTk.PhotoImage(Img2Open,master = subWindow.rightUpSubWindow)
        global img3Png
        img3Png = ImageTk.PhotoImage(Img3Open,  master = subWindow.rightDownSubWindow)
        global img4Png
        img4Png = ImageTk.PhotoImage(Img4Open,  master = subWindow.rightDownSubWindow)
        global img5Png
        img5Png = ImageTk.PhotoImage(Img5Open,  master = subWindow.rightDownSubWindow)
        # 显示相似度降低值
        subWindow.rightUpSubWindow.similarityDec = Label(subWindow.rightUpSubWindow, text = "您的文章相似度降低了:" + similarityDecPercent)
        subWindow.rightUpSubWindow.similarityDec.place(relheight = 0.1, relwidth = 0.9, relx = 0.05, rely = 0.05)
        # 显示高级度提升值
        subWindow.rightUpSubWindow.wordLevelInc = Label(subWindow.rightUpSubWindow, text = "您的文章词汇高级度提升了:" + wordLevelIncPercent)
        subWindow.rightUpSubWindow.wordLevelInc.place(relheight = 0.1, relwidth = 0.9, relx = 0.05, rely = 0.2)
        # 显示fig1
        subWindow.rightUpSubWindow.fig1 = Label(subWindow.rightUpSubWindow, image = img1Png)
        subWindow.rightUpSubWindow.fig1.place(relheight = 0.4, relwidth = 0.4, relx = 0.05, rely = 0.5)
        # 显示fig2
        subWindow.rightUpSubWindow.fig2 = Label(subWindow.rightUpSubWindow, image = img2Png)
        subWindow.rightUpSubWindow.fig2.place(relheight = 0.4, relwidth = 0.4, relx = 0.55, rely = 0.5)
        # 显示文章主题契合度提升值
        subWindow.rightDownSubWindow.themeFit = Label(subWindow.rightDownSubWindow, text = "您的文章主题契合度提升了:" + themeFitIncPercent)
        subWindow.rightDownSubWindow.themeFit.place(relheight = 0.1, relwidth = 0.9, relx = 0.05, rely = 0.05)
        # 显示关键词分布
        subWindow.rightDownSubWindow.keyword = Label(subWindow.rightDownSubWindow, text = "具体关键词分布如下:")
        subWindow.rightDownSubWindow.keyword.place(relheight = 0.1, relwidth = 0.9, relx = 0.05, rely = 0.2)
        # 显示fig3
        subWindow.rightDownSubWindow.fig3 = Label(subWindow.rightDownSubWindow, image = img3Png)
        subWindow.rightDownSubWindow.fig3.place(relheight = 0.4, relwidth = 0.3, relx = 0, rely = 0.5)
        # 显示fig4
        subWindow.rightDownSubWindow.fig4 = Label(subWindow.rightDownSubWindow, image = img4Png)
        subWindow.rightDownSubWindow.fig4.place(relheight = 0.4, relwidth = 0.3, relx = 0.35, rely = 0.5)
        # 显示fig5
        subWindow.rightDownSubWindow.fig5 = Label(subWindow.rightDownSubWindow, image = img5Png)
        subWindow.rightDownSubWindow.fig5.place(relheight = 0.4, relwidth = 0.3, relx = 0.7, rely = 0.5)

    # 显示检测报告
    def showCheckReport(self, subWindow) :
        # 清空子窗体内控件
        for widget in subWindow.winfo_children() :
            widget.destroy()
        # 初始化报告图片
        global checkReportPng
        checkReportPng = ImageTk.PhotoImage(Img1Open,master = subWindow)
        # 显示报告
        subWindow.checkReport = Label(subWindow, image = checkReportPng)
        subWindow.checkReport.place(relheight = 0.9, relwidth = 0.9, relx = 0.05, rely = 0.05)


    # 显示题目要求
    def showRequirement(self, subWindow) :
        # 清空子窗体内控件
        for widget in subWindow.winfo_children() :
            widget.destroy()
        # 显示题目要求
        subWindow.requirementTextBox = scrolledtext.ScrolledText(subWindow)
        subWindow.requirementTextBox.insert(END,requirementText)
        subWindow.requirementTextBox.place(relheight = 0.9, relwidth = 0.9, relx = 0.05, rely = 0.05)



root = Tk()
root.title(windowTitle)
window1 = mainWindow(master = root)
window1.mainloop()