课设要求
项目构成
实现思路
下述方案针对的是英文文章
- 爬取两个页面的 Html 源代码
- 利用正则表达式从 Html 源代码中提取出文章的正文部分
- 分词
(1) 把正文中的标点符号都替换为空格 (只留下英文字母和空格),把文章字符都转为小写字母(这样在后续计算内容相似度时可以把my My归为一类,提高结果准确度)
(2) 使用正则表达式和之前设计好的空格利用[a-zA-Z]{1,20},将每篇文章的单词都匹配出来,形成两个数组 A 和 B(都不需要进行去重),A和B分别保存文章 1 的所有单词和文章 2 的所有单词.
(3) 用一个链表将 A 和 B 的内容串起来,并对进行链表去重(此时链表保存的内容就是AUB),去重后链表的长度就是计算余弦相似度时两个文章对应的向量的维数
(4) 给两篇文章都各自分配一个*步骤(3)*生成的链表,为生成各自的tf和cos向量做准备
(4) 得出两篇文章各自对应的向量 a,向量 b,两向量维数相同,不存在的分量记为 0
(5) 计算 a,b 的余弦相似度
方案示例
程序运行
如果两篇文章相同 那么余弦相似度就是1
思考
项目名取的不够准确(取名时候没注意),叫作tfcos更合适
还可以改进的地方:
比如 tree-trees怎么才能归为一项,
live-lived怎么归为一项
try-tried怎么归为一项
源代码
testTFIDF.pro
#-------------------------------------------------
#
# Project created by QtCreator 2020-04-08T15:32:18
#
#-------------------------------------------------
QT += core gui
QT += core gui network
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
TARGET = untitled4
TEMPLATE = app
# The following define makes your compiler emit warnings if you use
# any feature of Qt which as been marked as deprecated (the exact warnings
# depend on your compiler). Please consult the documentation of the
# deprecated API in order to know how to port your code away from it.
DEFINES += QT_DEPRECATED_WARNINGS
# You can also make your code fail to compile if you use deprecated APIs.
# In order to do so, uncomment the following line.
# You can also select to disable deprecated APIs only up to a certain version of Qt.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
SOURCES += \
main.cpp \
mainwindow.cpp \
robotsandre.cpp \
tfwithcos.cpp
HEADERS += \
mainwindow.h \
robotsandre.h \
tfwithcos.h
FORMS += \
mainwindow.ui
RESOURCES += \
tupian.qrc
**mainwindow.h**
```cpp
#ifndef MAINWINDOW_H
#define MAINWINDOW_H
#include <QMainWindow>
#include "tfwithcos.h"
#include "ui_mainwindow.h"
#include "robotsandre.h"
namespace Ui {
class MainWindow;
}
class MainWindow : public QMainWindow
{
Q_OBJECT
public:
explicit MainWindow(QWidget *parent = 0);
//根据输入的两个url爬虫获取两个页面,并且把特殊符号全部换成空格串
bool GetTwopages();
//把提取到的两篇文章(已经在GetTwopages把单词用空格隔开了)利用正则提取出每一个单词
bool TFwithCOS();
QString *TEXToneArray;//把两篇文章分词后保存在这里面
QString *TEXTtwoArray;
int TextOneWordNum;//两篇文章的单词数
int TextTwoWordNum;
Node *LinkOfpageOne;
Node *LinkOfpageTwo;
int LinklistLength;
double *cosoneArray;
double *costwoArray;
~MainWindow();
private slots:
void on_btn_Start_clicked();
private:
Ui::MainWindow *ui;
};
#endif // MAINWINDOW_H
**robotsandre.h**
```cpp
#ifndef ROBOTSANDRE_H
#define ROBOTSANDRE_H
#endif // ROBOTSANDRE_H
#include<QCoreApplication>
#include<QRegularExpression>
#include<QRegularExpressionMatch>
#include<QRegularExpressionMatchIterator>
#include<QString>
#include<QDebug>
#include <QCoreApplication>
#include<QtCore>
#include<QNetworkAccessManager>
#include<QUrl>
#include<QNetworkRequest>
#include<QNetworkReply>
#include<QObject>
QString Robots(QString url);//爬取页面
QString RegularExpression(QString HTML,QString re);//正则提取爬到的页面
robotsandre.h
#ifndef ROBOTSANDRE_H
#define ROBOTSANDRE_H
#endif // ROBOTSANDRE_H
#include<QCoreApplication>
#include<QRegularExpression>
#include<QRegularExpressionMatch>
#include<QRegularExpressionMatchIterator>
#include<QString>
#include<QDebug>
#include <QCoreApplication>
#include<QtCore>
#include<QNetworkAccessManager>
#include<QUrl>
#include<QNetworkRequest>
#include<QNetworkReply>
#include<QObject>
QString Robots(QString url);//爬取页面
QString RegularExpression(QString HTML,QString re);//正则提取爬到的页面
tfwithcos.h
#ifndef TFWITHCOS_H
#define TFWITHCOS_H
#endif // TFWITHCOS_H
#include<QCoreApplication>
#include<QRegularExpression>
#include<QRegularExpressionMatch>
#include<QRegularExpressionMatchIterator>
#include<QString>
#include<QDebug>
#include <QCoreApplication>
#include<QtCore>
#include<QUrl>
#include<QObject>
struct Node
{
QString _word;
float _num;
Node *_next;
Node()
{
_num=0;
_next=nullptr;
}
Node(QString word)
{
_word=word;
_num=0;
_next=nullptr;
}
};
//把两篇文章生成两个数组
void RaiseTwoArray(QString * &TEXToneArray,QString * &TEXTtwoArray,int &TextOneWordNum,int &TextTwoWordNum,QString TEXTone,QString TEXTtwo);
void AddAtTail(Node * Head,QString word);//链表结尾添加节点
void ShowLinkList(Node* Head);//显示链表
//把刚刚生成的两个数组合并成一个链表,并且在合并的过程中顺带进行链表去重
Node* CombineTwoArrayToALinkList(QString * TEXToneArray,QString * TEXTtwoArray,int TextOneWordNum,int TextTwoWordNum,int &LinkListLength);
void fillLinkList(Node *LinkListOfpage,QString *TEXTArray,int TextWordNum);
QString showtf(Node *Head,int Length);//生成一篇文章tf的结果
//计算余弦相似度的部分
double cal(int N,double *oneArray,double *twoArray);//计算余弦相似度
main.cpp
#include "mainwindow.h"
#include <QApplication>
int main(int argc, char *argv[])
{
QApplication a(argc, argv);
MainWindow w;
w.show();
return a.exec();
}
mainwindow.cpp
#include "mainwindow.h"
MainWindow::MainWindow(QWidget *parent) :
QMainWindow(parent),
ui(new Ui::MainWindow)
{
ui->setupUi(this);
}
MainWindow::~MainWindow()
{
delete [] this->TEXToneArray;
delete [] this->TEXTtwoArray;
delete [] this->cosoneArray;
delete [] this->costwoArray;
Node* q=this->LinkOfpageOne->_next;
while(q!=nullptr)
{
this->LinkOfpageOne->_next=q->_next;
delete q;
q=this->LinkOfpageOne->_next;
}delete this->LinkOfpageOne;
Node* q2=this->LinkOfpageTwo->_next;
while(q2!=nullptr)
{
this->LinkOfpageTwo->_next=q2->_next;
delete q2;
q2=this->LinkOfpageTwo->_next;
}delete this->LinkOfpageTwo;
delete ui;
}
void MainWindow::on_btn_Start_clicked()
{
//爬虫部分
GetTwopages();//根据输入的两个url爬虫获取两个页面,并且把特殊符号全部换成空格串
//计算TF的部分
QString TEXTone=ui->plainTxt_result->toPlainText();
QString TEXTtwo=ui->plainTxt_result_2->toPlainText();
//把两篇文章分别生成两个数组,就分词就好
RaiseTwoArray(this->TEXToneArray,this->TEXTtwoArray,this->TextOneWordNum,this->TextTwoWordNum,TEXTone,TEXTtwo);
//转换为小写,这样可以把类似It,it,My,my统一。
for(int i=0;i<this->TextOneWordNum;i++)
{
TEXToneArray[i]=TEXToneArray[i].toLower();
}
for(int i=0;i<this->TextTwoWordNum;i++)
{
TEXTtwoArray[i]=TEXTtwoArray[i].toLower();
}
//生成两篇文章的单词的并的链表(就是两篇文章单词汇总并且链表去重),然后按照各自的情况填充数组
this->LinklistLength=0;
LinkOfpageOne=CombineTwoArrayToALinkList(this->TEXToneArray,this->TEXTtwoArray,this->TextOneWordNum,this->TextTwoWordNum,this->LinklistLength);
LinkOfpageTwo=CombineTwoArrayToALinkList(TEXToneArray,this->TEXTtwoArray,this->TextOneWordNum,this->TextTwoWordNum,this->LinklistLength);
qDebug()<<"公共链表的长度:"<<this->LinklistLength;
// ShowLinkList(LinkOfpageTwo);//one,two都是一样的
//分别在公共链表中统计两篇文章的词的情况
// qDebug()<<"统计后各自的情况:";
fillLinkList(LinkOfpageOne,this->TEXToneArray,this->TextOneWordNum);
fillLinkList(LinkOfpageTwo,this->TEXTtwoArray,this->TextTwoWordNum);
// ShowLinkList(LinkOfpageOne);
// ShowLinkList(LinkOfpageTwo);
QString TextOnetf=showtf(LinkOfpageOne,this->TextOneWordNum);//生成一篇文章tf的结果
QString TextTwotf=showtf(LinkOfpageTwo,this->TextTwoWordNum);//生成一篇文章tf的结果
ui->plainTxt_tf1->setPlainText(TextOnetf);
ui->plainTxt_tf2->setPlainText(TextTwotf);
//构造和公共链表一样长的两个数组,生成cos向量
this->cosoneArray=new double[this->LinklistLength];
this->costwoArray=new double[this->LinklistLength];
int index=0;
Node *p=this->LinkOfpageOne;
while(p!=nullptr)
{
cosoneArray[index]=p->_num/this->TextOneWordNum;
index++;
p=p->_next;
}
index=0;
p=this->LinkOfpageTwo;
while(p!=nullptr)
{
costwoArray[index]=p->_num/this->TextTwoWordNum;
index++;
p=p->_next;
}
for(int i=0;i<LinklistLength;i++)
{
qDebug()<<cosoneArray[i];
}
for(int i=0;i<LinklistLength;i++)
{
qDebug()<<costwoArray[i];
}
double coskey=cal(this->LinklistLength,cosoneArray,costwoArray);//计算余弦相似度
qDebug()<<"余弦相似度:"<<coskey;
ui->lineEdit_coskey->setText(" "+QString::number(coskey));
}
bool MainWindow::GetTwopages()//根据输入的两个url爬虫获取两个页面,并且把特殊符号全部换成空格串
{
QString URLSTR;
QString URLSTR2;
URLSTR=ui->lnt_url->text();
URLSTR2=ui->lnt_url_2->text();
//爬虫部分ok了
ui->progressBar->setValue(20);
QString HTML=Robots(URLSTR);//爬虫的函数
QString HTML2=Robots(URLSTR2);//爬虫的函数
ui->plaintxt_rawHtml->setPlainText(HTML);//填充GUI
ui->plaintxt_rawHtml_2->setPlainText(HTML2);//填充GUI
//正则部分匹配
QString re="<div id=\"dede_content\">(?<zw>.*)<div class=\"dede_pages\">";
ui->progressBar->setValue(50);
QString tempkeyTEXT=RegularExpression(HTML,re);//一轮正则
QString tempkeyTEXT2=RegularExpression(HTML2,re);//一轮正则
QString re1="<div>(?<zw>.*?)</div>";
ui->progressBar->setValue(75);
QString keyTEXT=RegularExpression(tempkeyTEXT,re1);//二轮正则
QString keyTEXT2=RegularExpression(tempkeyTEXT2,re1);//二轮正则
//替换结果里的奇奇怪怪的编码
keyTEXT=keyTEXT.replace("'","\'");//把'的编码替换成单引号
keyTEXT=keyTEXT.replace(" " , " ");//把 的编码替换成空
keyTEXT=keyTEXT.replace("""," ");//把'的编码替换成单引号
keyTEXT=keyTEXT.replace("\'re"," are");
keyTEXT=keyTEXT.replace("\'m"," am");
keyTEXT=keyTEXT.replace("\'s"," ");
keyTEXT=keyTEXT.replace("\n"," ");
keyTEXT=keyTEXT.replace(" "," ");//把长串空格换成一个
keyTEXT=keyTEXT.replace(" "," ");//把长串空格换成一个
keyTEXT=keyTEXT.replace(" "," ");//把长串空格换成一个
keyTEXT=keyTEXT.replace(" "," ");//把长串空格换成一个
keyTEXT=keyTEXT.replace(" "," ");//把长串空格换成一个
keyTEXT=keyTEXT.replace(" "," ");//把长串空格换成一个
for(int i=0;i<keyTEXT.size();i++)
{
if(('a'<=keyTEXT[i]&&keyTEXT[i]<='z') || ('A'<=keyTEXT[i]&&keyTEXT[i]<='Z'))
continue;
else
{
if('\''==keyTEXT[i])
continue;
else
{
qDebug()<<"有"<<keyTEXT[i];
keyTEXT[i]=' ';
}
}
}
keyTEXT2=keyTEXT2.replace("'","\'");//把'的编码替换成单引号
keyTEXT2=keyTEXT2.replace(" " , " ");//把 的编码替换成空
keyTEXT2=keyTEXT2.replace("""," ");//把'的编码替换成单引号
keyTEXT2=keyTEXT2.replace("\'re"," are");
keyTEXT2=keyTEXT2.replace("\'m"," am");
keyTEXT2=keyTEXT2.replace("\'s"," ");
keyTEXT2=keyTEXT2.replace(" "," ");//把长串空格换成一个
keyTEXT2=keyTEXT2.replace(" "," ");//把长串空格换成一个
keyTEXT2=keyTEXT2.replace(" "," ");//把长串空格换成一个
keyTEXT2=keyTEXT2.replace(" "," ");//把长串空格换成一个
keyTEXT2=keyTEXT2.replace(" "," ");//把长串空格换成一个
keyTEXT2=keyTEXT2.replace(" "," ");//把长串空格换成一个
for(int i=0;i<keyTEXT2.size();i++)
{
if(('a'<=keyTEXT2[i]&&keyTEXT2[i]<='z') || ('A'<=keyTEXT2[i]&&keyTEXT2[i]<='Z'))
continue;
else
{
if('\''==keyTEXT2[i])
continue;
else
{
qDebug()<<"有"<<keyTEXT2[i];
keyTEXT2[i]=' ';
}
}
}
ui->plainTxt_result->setPlainText(keyTEXT);
ui->plainTxt_result_2->setPlainText(keyTEXT2);
ui->progressBar->setValue(100);
}
robotsandre.cpp
#include "robotsandre.h"
QString Robots(QString url)//爬取页面
{
QUrl URL=url;
QNetworkAccessManager manager;
QEventLoop Loop;
QNetworkReply *reply=manager.get(QNetworkRequest(URL));
QObject::connect(reply,SIGNAL(finished()),&Loop,SLOT(quit()));
Loop.exec();
QString HtmlText=reply->readAll();
return HtmlText;
}
QString RegularExpression(QString HTML,QString re)//正则提取爬到的页面正文
{
QString TextAfterRe=" ";
//这个第二个参数太重要了,让.可以匹配换行,不然会匹配不出东西的,如果不写的话.是不能匹配换行的
QRegularExpression Re(re,QRegularExpression::DotMatchesEverythingOption);
QRegularExpressionMatchIterator Matchs=Re.globalMatch(HTML);
QRegularExpressionMatch match=Matchs.next();
TextAfterRe=TextAfterRe+match.captured("zw");//一轮匹配结果
while(Matchs.hasNext()==true)
{
match=Matchs.next();
QString temp=match.captured("zw");
if(temp=="\r\n\t ")
{
TextAfterRe=TextAfterRe+"\n";
continue;
}
TextAfterRe=TextAfterRe+"\n"+temp;
}
return TextAfterRe;
}
tfwithcos.cpp
#include "tfwithcos.h"
//把两篇文章生成两个数组
void RaiseTwoArray(QString * &TEXToneArray,QString * &TEXTtwoArray,int &TextOneWordNum,int &TextTwoWordNum,QString TEXTone,QString TEXTtwo)
{
QRegularExpression REoneword("[a-zA-Z]{1,20}");
QRegularExpressionMatchIterator Matchsone=REoneword.globalMatch(TEXTone);
QRegularExpressionMatchIterator Matchstwo=REoneword.globalMatch(TEXTtwo);
TextOneWordNum=0;
while(Matchsone.hasNext())
{
QRegularExpressionMatch tempmatch=Matchsone.next();//不加这句会异常退出,不知道为什么。。
TextOneWordNum++;
}
TextTwoWordNum=0;
while(Matchstwo.hasNext())
{
QRegularExpressionMatch tempmatch=Matchstwo.next();//不加这句会异常退出,不知道为什么。。
TextTwoWordNum++;
}
qDebug()<<TextOneWordNum<<" 和 "<<TextTwoWordNum;
TEXToneArray=new QString [TextOneWordNum];
TEXTtwoArray=new QString [TextTwoWordNum];
Matchsone=REoneword.globalMatch(TEXTone);
Matchstwo=REoneword.globalMatch(TEXTtwo);
int index=0;
while(Matchsone.hasNext())
{
QRegularExpressionMatch match=Matchsone.next();
TEXToneArray[index]=match.captured(0);
// qDebug()<<TEXToneArray[index];
index++;
}
index=0;
while(Matchstwo.hasNext())
{
QRegularExpressionMatch match=Matchstwo.next();
TEXTtwoArray[index]=match.captured(0);
//qDebug()<<TEXTtwoArray[index];
index++;
}
}
void AddAtTail(Node * Head,QString word)//把节点添加到末尾
{
Node *p;
p=Head;
while(p->_next!=nullptr)
{
p=p->_next;
}
p->_next=new Node(word);
}
//把刚刚生成的两个数组合并成一个链表,并且在合并的过程中顺带进行链表去重
Node* CombineTwoArrayToALinkList(QString * TEXToneArray,QString * TEXTtwoArray,int TextOneWordNum,int TextTwoWordNum,int &LinkListLength)
{
LinkListLength=0;
Node * LinklistHead=nullptr;
Node *p=LinklistHead;
for(int i=0;i<TextOneWordNum;i++)
{
p=LinklistHead;
if(LinklistHead==nullptr)//如果是空链表,就加上去就可以了
{
LinklistHead=new Node("i");//头节点
LinkListLength++;
AddAtTail(LinklistHead,TEXToneArray[i]);
LinkListLength++;
}
else//链表非空的情况
{
while(p!=nullptr)
{
if(p->_word==TEXToneArray[i])//已经存在了
break;
p=p->_next;
}
if(p==nullptr)//找到链表的最后都没有找到重复,就加上去
{
AddAtTail(LinklistHead,TEXToneArray[i]);
LinkListLength++;
}
}
}
//添加第二篇文章数组的内容
for(int i=0;i<TextTwoWordNum;i++)
{
p=LinklistHead;
if(LinklistHead==nullptr)//如果是空链表,就加上去就可以了
{
LinklistHead=new Node("start");//头节点
LinkListLength++;
AddAtTail(LinklistHead,TEXTtwoArray[i]);
LinkListLength++;
}
else//链表非空的情况
{
while(p !=nullptr)
{
if(p->_word==TEXTtwoArray[i])
break;
p=p->_next;
}
if(p==nullptr)//找到链表的最后都没有找到重复,就加上去
{
AddAtTail(LinklistHead,TEXTtwoArray[i]);
LinkListLength++;
}
}
}
return LinklistHead;
}
void ShowLinkList(Node* Head)
{
qDebug()<<"链表:"<<Head;
Node *p=Head;
while(p!=nullptr)
{
qDebug()<<p->_word<<":"<<p->_num;
p=p->_next;
}
}
//根据文章各自的情况 填充自己的总链表
void fillLinkList(Node *LinkListOfpage,QString *TEXTArray,int TextWordNum)
{
Node * p=LinkListOfpage;
for(int i=0;i<TextWordNum;i++)
{
p=LinkListOfpage;
while(p!=nullptr)
{
if(p->_word==TEXTArray[i])
{
p->_num++;
break;
}
p=p->_next;
}
}
}
QString showtf(Node *Head,int Length)//生成一篇文章tf的结果
{
QString result="";
Node *p=Head;
while(p!=nullptr)
{
if(p->_num!=0)//是0就说明是另一篇文章的词语,这篇文章里面没有这个单词
{
result=result+p->_word+":"+QString::number(p->_num)+"/"+QString::number(Length)+'\n';
}
p=p->_next;
}
return result;
}
//计算余弦相似度的部分
double cal(int N,double *oneArray,double *twoArray)//计算余弦相似度
{
double coskey;
double fz=0;
double fm=0;
for(int i=0;i<N;i++)
{
fz=fz+oneArray[i]*twoArray[i];
}
qDebug()<<"fz:"<<fz;
double onefm=0;
double twofm=0;
for(int i=0;i<N;i++)
{
onefm=onefm+oneArray[i]*oneArray[i];
twofm=twofm+twoArray[i]*twoArray[i];
}
qDebug()<<"onefm"<<onefm<<" "<<"twofm"<<twofm;
fm=sqrt(onefm)*sqrt(twofm);
qDebug()<<"fm"<<fm;
coskey=fz/fm;
return coskey;
}
mainwindow.ui
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>MainWindow</class>
<widget class="QMainWindow" name="MainWindow">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>1169</width>
<height>700</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<weight>75</weight>
<italic>true</italic>
<bold>true</bold>
</font>
</property>
<property name="windowTitle">
<string>爬取并计算Web网页的内容相似度 Alsn 信息内容安全课设</string>
</property>
<property name="windowIcon">
<iconset resource="tupian.qrc">
<normaloff>:/icon.png</normaloff>:/icon.png</iconset>
</property>
<property name="autoFillBackground">
<bool>false</bool>
</property>
<property name="styleSheet">
<string notr="true">#MainWindow {background-color: rgb(140,199,181);}</string>
</property>
<widget class="QWidget" name="centralWidget">
<widget class="QLabel" name="label">
<property name="geometry">
<rect>
<x>30</x>
<y>10</y>
<width>91</width>
<height>42</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial Black</family>
<pointsize>14</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="styleSheet">
<string notr="true">color: rgb(0, 0, 0);</string>
</property>
<property name="text">
<string>URL1</string>
</property>
</widget>
<widget class="QLineEdit" name="lnt_url">
<property name="geometry">
<rect>
<x>120</x>
<y>20</y>
<width>1011</width>
<height>31</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>11</pointsize>
<weight>50</weight>
<bold>false</bold>
</font>
</property>
<property name="styleSheet">
<string notr="true">background-color: rgb(255, 245, 183);</string>
</property>
<property name="placeholderText">
<string>在此输入文章链接</string>
</property>
</widget>
<widget class="QPlainTextEdit" name="plainTxt_result">
<property name="geometry">
<rect>
<x>420</x>
<y>220</y>
<width>491</width>
<height>211</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>14</pointsize>
<weight>50</weight>
<italic>false</italic>
<bold>false</bold>
</font>
</property>
<property name="styleSheet">
<string notr="true">background-color: rgb(214,213,183);</string>
</property>
</widget>
<widget class="QPlainTextEdit" name="plaintxt_rawHtml">
<property name="enabled">
<bool>true</bool>
</property>
<property name="geometry">
<rect>
<x>20</x>
<y>220</y>
<width>371</width>
<height>211</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>12</pointsize>
<weight>50</weight>
<italic>false</italic>
<bold>false</bold>
</font>
</property>
<property name="mouseTracking">
<bool>false</bool>
</property>
<property name="autoFillBackground">
<bool>false</bool>
</property>
<property name="styleSheet">
<string notr="true">background-color: rgb(214,213,183);</string>
</property>
</widget>
<widget class="QPushButton" name="btn_Start">
<property name="geometry">
<rect>
<x>20</x>
<y>110</y>
<width>93</width>
<height>51</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>14</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="styleSheet">
<string notr="true">background-color: rgb(255, 216, 58);</string>
</property>
<property name="text">
<string>Start</string>
</property>
</widget>
<widget class="QLabel" name="label_2">
<property name="geometry">
<rect>
<x>160</x>
<y>190</y>
<width>121</width>
<height>21</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>12</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="text">
<string>文章一HTML</string>
</property>
</widget>
<widget class="QLabel" name="label_3">
<property name="geometry">
<rect>
<x>630</x>
<y>190</y>
<width>61</width>
<height>21</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>12</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="text">
<string>文章一</string>
</property>
</widget>
<widget class="QProgressBar" name="progressBar">
<property name="geometry">
<rect>
<x>120</x>
<y>120</y>
<width>641</width>
<height>41</height>
</rect>
</property>
<property name="styleSheet">
<string notr="true"/>
</property>
<property name="value">
<number>0</number>
</property>
</widget>
<widget class="QLabel" name="label_4">
<property name="geometry">
<rect>
<x>30</x>
<y>60</y>
<width>91</width>
<height>42</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial Black</family>
<pointsize>14</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="styleSheet">
<string notr="true">color: rgb(0, 0, 0);</string>
</property>
<property name="text">
<string>URL2</string>
</property>
</widget>
<widget class="QLineEdit" name="lnt_url_2">
<property name="geometry">
<rect>
<x>120</x>
<y>60</y>
<width>1011</width>
<height>31</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>11</pointsize>
<weight>50</weight>
<bold>false</bold>
</font>
</property>
<property name="styleSheet">
<string notr="true">background-color: rgb(255, 245, 183);</string>
</property>
<property name="placeholderText">
<string>在此输入文章链接</string>
</property>
</widget>
<widget class="QLabel" name="label_5">
<property name="geometry">
<rect>
<x>150</x>
<y>440</y>
<width>111</width>
<height>21</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>12</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="text">
<string>文章二HTML</string>
</property>
</widget>
<widget class="QPlainTextEdit" name="plaintxt_rawHtml_2">
<property name="enabled">
<bool>true</bool>
</property>
<property name="geometry">
<rect>
<x>20</x>
<y>470</y>
<width>371</width>
<height>211</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>12</pointsize>
<weight>50</weight>
<italic>false</italic>
<bold>false</bold>
</font>
</property>
<property name="mouseTracking">
<bool>false</bool>
</property>
<property name="autoFillBackground">
<bool>false</bool>
</property>
<property name="styleSheet">
<string notr="true">background-color: rgb(214,213,183);</string>
</property>
</widget>
<widget class="QLabel" name="label_6">
<property name="geometry">
<rect>
<x>630</x>
<y>440</y>
<width>61</width>
<height>21</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>12</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="text">
<string>文章二</string>
</property>
</widget>
<widget class="QPlainTextEdit" name="plainTxt_result_2">
<property name="geometry">
<rect>
<x>420</x>
<y>470</y>
<width>491</width>
<height>211</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>14</pointsize>
<weight>50</weight>
<italic>false</italic>
<bold>false</bold>
</font>
</property>
<property name="styleSheet">
<string notr="true">background-color: rgb(214,213,183);</string>
</property>
</widget>
<widget class="QPlainTextEdit" name="plainTxt_tf1">
<property name="geometry">
<rect>
<x>940</x>
<y>220</y>
<width>211</width>
<height>211</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>12</pointsize>
<weight>75</weight>
<italic>false</italic>
<bold>true</bold>
</font>
</property>
<property name="styleSheet">
<string notr="true">background-color: rgb(255, 245, 183);</string>
</property>
</widget>
<widget class="QPlainTextEdit" name="plainTxt_tf2">
<property name="geometry">
<rect>
<x>940</x>
<y>470</y>
<width>211</width>
<height>211</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>12</pointsize>
<weight>75</weight>
<italic>false</italic>
<bold>true</bold>
</font>
</property>
<property name="styleSheet">
<string notr="true">background-color: rgb(255, 245, 183);</string>
</property>
</widget>
<widget class="QLabel" name="label_7">
<property name="geometry">
<rect>
<x>1000</x>
<y>190</y>
<width>91</width>
<height>21</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>12</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="text">
<string>文章一TF</string>
</property>
</widget>
<widget class="QLabel" name="label_8">
<property name="geometry">
<rect>
<x>1000</x>
<y>440</y>
<width>91</width>
<height>21</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>12</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="text">
<string>文章二TF</string>
</property>
</widget>
<widget class="QLabel" name="label_9">
<property name="geometry">
<rect>
<x>780</x>
<y>120</y>
<width>141</width>
<height>41</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>16</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="text">
<string>余弦相似度:</string>
</property>
</widget>
<widget class="QLineEdit" name="lineEdit_coskey">
<property name="geometry">
<rect>
<x>930</x>
<y>120</y>
<width>201</width>
<height>41</height>
</rect>
</property>
<property name="font">
<font>
<family>Arial</family>
<pointsize>16</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
</widget>
</widget>
<widget class="QStatusBar" name="statusBar"/>
</widget>
<layoutdefault spacing="6" margin="11"/>
<resources>
<include location="tupian.qrc"/>
</resources>
<connections/>
</ui>