根据输入文本生成随机文本

最新推荐文章于 2021-12-13 20:24:08 发布

hello_duoduo

最新推荐文章于 2021-12-13 20:24:08 发布

阅读量791

点赞数

本文链接：https://blog.csdn.net/hello_duoduo/article/details/8986944

版权

根据输入文本，通过建立马尔科夫过程，即根据前几个单词来决定下个输出的单词，该程序来源《编程珠玑》第15.3，原文用的c，本文用c++实现。

该程序使用的数据结构式“后缀数组”，一种简单且高效的数据结构。

#include "stdafx.h"
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <time.h>
using namespace std;

class RandomText
{
public:
	RandomText(string str):filename(str),nword(0){}
	~RandomText(){}
	void Input();
	static int WordCmp(const void *p,const void *q);
	static int CharCmp(const char *first, const char *second);
	void Sort();
	void Bin();
	const char *Skip(const char *p, int index);
private:
	static const int approximation=1; //阶数，即根据前两个单词来推下一个单词
	char inputchars[5000];   //保存文件
	char *word[1000];       //保存每个单词的起始地址
	int nword;            //单词的个数
	string filename;      //文件名称
};

/*********************************
将文件的单词保存到inputchars数组中，
word数组中将保存每个单词的起始地址
*********************************/
void RandomText::Input()
{
	fstream infile(filename.c_str());
	if(!infile)
		return;
	string line;
	word[0]=inputchars;
	while(getline(infile,line))
	{
		istringstream stream(line);
		string temp;
		while(stream>>temp)
		{
			strcpy(word[nword],temp.c_str());
			word[nword+1]=word[nword]+strlen(word[nword])+1;
			nword++;
		}
	}
	infile.close();
}
/********************************
比较两个字符数组
*********************************/
int RandomText::WordCmp(const void *p, const void *q)
{	
	const char *first=*(const char**)(p);
	const char *second=*(const char**)(q);
	return CharCmp(first,second);
}

/***************************
比较两个字符串
****************************/
int RandomText::CharCmp(const char *first, const char *second)
{
	int n=approximation;
	for(;*first==*second;first++,second++)
	{
		if(*first==0&&--n==0)
			return 0;
	}
	return *first-*second;
}
/************************
对word数组进行排序
****************************/
void RandomText::Sort()
{
	for(int i=0;i!=approximation;i++)
	{
		word[nword][i]=0;
	}
	for(int i=0;i!=approximation;i++)
	{
		cout<<word[i]<<" ";
	}
	qsort(word,nword,sizeof(word[0]),WordCmp);
}

/*************************
主要实现部分
**************************/
void RandomText::Bin()
{
	srand((unsigned int)time(NULL));
	const char *phrase=inputchars;
	for(int wordleft=1000;wordleft>0;wordleft--)
	{
		int small=-1;
		int big=nword;
		while(small+1!=big)
		{
			int middle=(small+big)/2;
			if(CharCmp(word[middle],phrase)<0)
				small=middle;
			else
				big=middle;
		}
		char *p;
		int i=1;
		for(;CharCmp(phrase,word[big+i])==0;i++);
	    p=word[big+(rand()%i)];
		phrase=Skip(p,approximation);
		if(strlen(Skip(phrase,approximation))==0)
			break;
		cout<<Skip(phrase,approximation)<<" ";
	}
}
/*********************
p为字符数组
返回p的第index个单词的起始地址
*********************/
const char *RandomText::Skip(const char *p, int index)
{
	int k=approximation;
	while(k!=0)
	{
		while(*p!=0)
			p++;
		p++;
		k--;
	}
	return p;
}

int _tmain(int argc, _TCHAR* argv[])
{
	RandomText rt("test.txt");
	rt.Input();
	rt.Sort();
	rt.Bin();
	return 0;
}

hello_duoduo

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
根据输入文本生成随机文本

根据输入文本，通过建立马尔科夫过程，即根据前几个单词来决定下个输出的单词，该程序来源《编程珠玑》第15.3，原文用的c，本文用c++实现。该程序使用的数据结构式“后缀数组”，一种简单且高效的数据结构。#include "stdafx.h"#include #include #include #include #include using namespace
复制链接

扫一扫