马尔可夫链文本生成c语言,利用马尔可夫链生成随机文本

最新推荐文章于 2021-09-11 20:43:27 发布

飞碟数码

最新推荐文章于 2021-09-11 20:43:27 发布

阅读量265

点赞数

文章标签：马尔可夫链文本生成c语言

the people 空 3) hash表与后缀数组相结合，使用后缀数组构造hash表

首先解决一个问题

当有多个suffix时，如何按照概率选择一个，比如the people by for 空，

int nmatch=0;

for everyone in suffix

if( rand()%++nmatch==0 )

select=this_suffix;

对每一个后缀都执行上述的判断，可知第一个suffix一定被选中，第二个suffix以1/2的概率替换，第三个以1/3的概率替换

#include

#include

#include

#define NHASH 49979

#define MULT 31

#define MAXWORDS 80000

char inputchars[4300000];//存储输入数据

char *word[MAXWORDS];//后缀数组

int nword=0;//记录单词数

int k=2;//2阶

int next[MAXWORDS];//用于构建hash表

int bin[NHASH];

//以k个单词为单位，进行hash

unsigned int hash(char* str){

int n;

unsigned int h=0;

char* p=str;

for(n=k;n>0;++p){

h=MULT*h+*p;

if(*p=='\0')

--n;

}

return h%NHASH;

}

//比较前k个单词的大小

int wordncmp(char* p,char *q){

int n;

for(n=k;*p==*q;++p,++q){

if(*p=='\0'&&(--n)==0)

return 0;

}

return *p-*q;

}

//从当前单词出发，跳过前n个单词

char* skip(char* p,int n){

for(;n>0;++p){

if(*p=='\0')

--n;

}

return p;

}

int main(){

int i,j;

//步骤1：构建后缀数组

word[0]=inputchars;

//scanf以空格作为分隔符, 并且自动加上'\0'

while((scanf("%s",word[nword]))!=EOF){

word[nword+1]=word[nword]+strlen(word[nword])+1;

++nword;

}

//附加k个空字符,保证wordncmp()正确(感觉不需要这个)

for(i=0;i

word[nword][i]='\0';

//步骤2：构建hash table

//初始化hash table

for(i=0;i

bin[i]=-1;

//hash表采用前插的方式。例如：word[0], word[1], word[5]拥有相同的hash值15

//则： bin[15](5)->next[5](1)->next[1](0)->next[0](-1)

for(i=0;i<=nword-k;++i){

j=hash(word[i]);

next[i]=bin[j];

bin[j]=i;

}

//步骤3：生成随机文本

int wordsleft;//生成单词数

int psofar;

char *phrase,*p;

phrase=inputchars;

for(wordsleft=10000;wordsleft>0;--wordsleft){

psofar=0;

for(j=bin[hash(phrase)];j>=0;j=next[j])

//在hash值相同的项中找出字符串值相同的后缀数组表项，根据概率选择一个

if(wordncmp(phrase,word[j])==0&&rand()%(++psofar)==0)

p=word[j];

//将phrase重新设置

phrase=skip(p,1);

//输出符合要求单词的后面第k个单词

if(strlen(skip(phrase,k-1))==0)

break;

printf("%s\n",skip(phrase,k-1));

}

return 0;

}

======================================================

======================================================

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
马尔可夫链文本生成c语言,利用马尔可夫链生成随机文本

the people 空3) hash表与后缀数组相结合，使用后缀数组构造hash表首先解决一个问题当有多个suffix时，如何按照概率选择一个，比如the people by for 空，int nmatch=0;for everyone in suffixif( rand()%++nmatch==0 )select=this_suffix;对...
复制链接

扫一扫

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。