本篇文章是描述使用动态规划算法实现文献查重(C/C++实现)的程序。
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <iomanip>
#include <string>
#include <string.h>
#include <sstream>
#include <cstdlib>
#include <fstream>
#include <cassert>
#include <algorithm>
#include <math.h>
#include <iterator>
#include <cctype>
using namespace std;
typedef struct Sentence{
string sentence; //记录句子
int num; //记录句子单词个数
}Sentence;
typedef struct sameSentence{
string libsent; //记录对到最长公共子序列的lib的句子
string textsent; //记录test中对比的句子
int textnum; //记录test中对比的句子的单词的个数
int samenum; //最长公共子序列的单词个数
}sameSentence;
sameSentence saveall[1000]; //存储所有对比好的信息
Sentence libSentence[1000]; //按句存储lib文件的内容
int wordSimilarity[1000][1000];
float Score[1000][1000]; //记录最长公共子序列的得分
string str1[1000],str2[1000]; //存储每个句子分割出来的单词
int libline = 0; //lib文件的句子的个数
int testline = 0; //test文件的句子的个数
void libtxtToSentence( string file); // 把lib里的内容按句子存储
void textToSentence( string file ); // 读取text文本的内容并使用动态规划进行查重
float max( float a, float b );
float maxthree( float a, float b, float c );
int main()
{
int j;
string libfile = "D://lib.txt";
string textfile = "D://text.txt";
libtxtToSentence( libfile );
textToSentence( textfile );
for( j=0; j<testline; j++ )
{
cout<<"第"<<j+1<<"句对比"<<endl;
cout<<saveall[j].textsent<<endl;
cout<<saveall[j].libsent<<endl;
cout<<saveall[j].samenum<<"\t"<<saveall[j].textnum<<endl;
cout<<"\n"<<endl;
}
return 0;
}
void libtxtToSentence( string file ) // 把lib里的内容按句子存储
{
ifstream infile;
infile.open( file.data() ); //将文件流对象与文件连接起来
assert( infile.is_open() ); //若失败,则输出错误消息,并终止程序运行
char currentChar;
string tempSentence="";
Sentence temp;
int num1 = 0;
infile >> noskipws;
while ( !infile.eof() )
{
infile>>currentChar;
if((( currentChar >='a' )&&( currentChar <='z' ))||(( currentChar >='A' )&&( currentChar <='Z' )))
{
tempSentence += currentChar;
}else if(( currentChar =='.' )||( currentChar =='?' )||( currentChar =='!' ))
{
transform(tempSentence.begin(), tempSentence.end(), tempSentence.begin(), ::tolower);
temp.num = num1+1;
temp.sentence = tempSentence;
libSentence[libline] = temp; //按每句子存储lib的内容到libSentence[]中
num1 = 0;
tempSentence = "";
libline++; //lib文件的句子的个数
}else //匹配到空格
{
if( tempSentence=="" )
{
continue;
}
else if(tempSentence[tempSentence.size()==1?1:tempSentence.size()-1] != ' ') //匹配到空格,当前句末不为空格,添加空格
{
tempSentence += " ";
num1 += 1; //单词个数加1
}
}
}
infile.close(); //关闭文件输入流
}
void textToSentence( string file ) // 读取text文本的内容并使用动态规划进行查重
{
ifstream infile;
infile.open( file.data() ); //将文件流对象与文件连接起来
assert( infile.is_open() ); //若失败,则输出错误消息,并终止程序运行
char currentChar;
string tempSentence="";
Sentence temp;
int num1 = 0;
infile >> noskipws;
while ( !infile.eof() )
{
infile>>currentChar;
if((( currentChar >='a' )&&( currentChar <='z' ))||(( currentChar >='A' )&&( currentChar <='Z' )))
{
tempSentence += currentChar;
}else if(( currentChar =='.' )||( currentChar =='?' )||( currentChar =='!' )) //完成一个句子的读取,进行动态规划匹配查重
{
transform(tempSentence.begin(), tempSentence.end(), tempSentence.begin(), ::tolower);
temp.num = num1+1; // 把当前读取到的test句子的单词个数存下
temp.sentence = tempSentence; // 把当前读取到的test句子存下
num1 = 0;
tempSentence = "";
for(int i=0;i<libline-1;i++ ) //分别与lib中的每一句进行对比
{
string t;
int n = 0;
int k,j;
for(istringstream is(temp.sentence); is>>t;) { //存储把test的句子分割出来的单词
str1[n++] = t;
}
n = 0;
for(istringstream is1(libSentence[i].sentence); is1>>t;) { //存储把lib的句子分割出来的单词
str2[n++] = t;
}
for( k=0;k<temp.num;k++) //test当前句子的个数
{
for( j=0;j<libSentence[i].num;j++) //与lib文件的句子的单词分别进行对比
{
if(str1[k]==str2[j])
wordSimilarity[k][j] = 1;
else
wordSimilarity[k][j] = 0;
}
}
for(k=0; k<temp.num; k++)
{
Score[k][0] = max(Score[k-1][0]-0.5, wordSimilarity[k][0]-0.5*(k-1) );
}
for(k=0; k<libSentence[i].num; k++)
{
Score[0][k] = max(Score[0][k-1]-0.5, wordSimilarity[0][k]-0.5*(k-1) );
}
for(k=1;k<temp.num;k++)
{
for(j=1;j<libSentence[i].num;j++)
{
Score[k][j] = maxthree( Score[k-1][j]-0.5, Score[k][j-1]-0.5, Score[k-1][j-1]+wordSimilarity[k][j] );
}
}
int sum = Score[temp.num-1][libSentence[i].num-1];
if(saveall[testline].samenum < sum)
{
saveall[testline].libsent = libSentence[i].sentence;
saveall[testline].samenum = sum;
saveall[testline].textnum = temp.num;
saveall[testline].textsent = temp.sentence;
}
}
testline++; //test文件的句子的个数
}else //匹配到空格
{
if( tempSentence=="" )
{
continue;
}
else if(tempSentence[tempSentence.size()==1?1:tempSentence.size()-1] != ' ') //匹配到空格,当前句末不为空格,添加空格
{
tempSentence += " ";
num1 += 1;
}
}
}
infile.close(); //关闭文件输入流
}
float max( float a, float b )
{
if( a > b )
return a;
else
return b;
}
float maxthree( float a, float b, float c )
{
float temp = a;
if( temp < b )
temp = b;
if( temp < c )
temp = c;
return temp;
}
加企鹅2844365449一起学习技术,讨论问题。
文件文本示例:lib.txx
A recursive algorithm is one that invokes makes reference to itself repeatedly until a certain condition also known as termination co......
text.txt
In mathematics and computer science, an algorithm is a self contained step by step set of operations to be performed. Algorithms