马尔可夫。。。。

不得不说,the practice of programming 一书真的很不错。。。。。根据一些文字的模式,随即生产文字。。。具体做啥找书看。。。。


#include<stdlib.h>
#include<stdio.h>
#include<string.h>

enum{
NPREF = 2, //number of prefix words
NHASH = 4093, //size of state hash table array
MAXGEN = 10000, //maximum words generated
MULTIPLIER=31
};

typedef struct State State;
typedef struct Suffix Suffix;
struct State{ //prefix+suffix list
char *pref[NPREF]; //prefix words
Suffix *suf; // suffix list
State *next; // next in hash table
};
struct Suffix{ //list of suffixes
char *word; //suffix
Suffix *next; //next in list of suffix
};

State *statetab[NHASH]; //hash table of states
char NONWORD[]="\n";


//compute hash value for array of NPREF strings
unsigned int hash(char *s[NPREF])
{
unsigned int h;
unsigned char *p;
int i;
h=0;
for(i=0;i<NPREF; i++)
for(p = (unsigned char *)s[i];*p!='\0';p++)
h=MULTIPLIER*h + *p;
return h % NHASH;
}

//lookup:search for prefix,create if requested
//return pointer if present or created;NULL if not
//creation doesn't strdup so strings mustn't change later
State* lookup(char *prefix[NPREF],int create)
{
int i,h;
State *sp;
h=hash(prefix);
for(sp=statetab[h];sp != NULL;sp=sp->next)
{
for(i=0;i<NPREF;i++)
if(strcmp(prefix[i],sp->pref[i])!=0)
break;
if(i == NPREF) //found it
return sp;
}
if(create)
{
sp=(State *)malloc(sizeof(State));
for(i=0;i<NPREF;i++)
sp->pref[i]=prefix[i];
sp->suf=NULL;
sp->next=statetab[h];
statetab[h]=sp;
}
return sp;
}

//addsuffix:add to state ,suffix must not change later
void addsuffix(State *sp,char *suffix)
{
Suffix *suf;
suf=(Suffix *)malloc(sizeof(Suffix));
suf->word=suffix;
suf->next=sp->suf;
sp->suf=suf;
}

//add:add word to suffix list,update prefix
void add(char *prefix[NPREF],char *suffix)
{
State *sp;
sp = lookup(prefix,1); //create if not found
addsuffix(sp,suffix);
//move the words down the prefix
memmove(prefix,prefix+1,(NPREF-1)*sizeof(prefix[0]));
prefix[NPREF-1]=suffix;
}

//build :read input ,build prefix table
void build(char *prefix[NPREF],FILE *f)
{
char buf[100],fmt[10];//="%99s";
sprintf(fmt,"%%%ds",sizeof(buf)-1);
while(fscanf(f,fmt,buf)!=EOF)
add(prefix,strdup(buf));
}

//gennerate:produce output,one word per line
void generate(int nwords)
{
State *sp;
Suffix *suf;
char *prefix[NPREF],*w;
int i,nmatch;

for(i=0;i<NPREF;i++)//reset initial prefix
prefix[i]=NONWORD;

for(i=0;i<nwords;i++)
{
sp=lookup(prefix,1);
nmatch=0;
for(suf = sp->suf;suf!=NULL;suf=suf->next)
if(rand()% ++ nmatch==0)//prob = 1/nmatch
w = suf->word;
if(strcmp(w,NONWORD)==0)
break;
printf("%s\n",w);
memmove(prefix,prefix+1,(NPREF-1)*sizeof(prefix[0]));
prefix[NPREF-1]=w;
}
}

//markov main
int main(void)
{
int i, nwords=MAXGEN;
char *prefix[NPREF];
for(i=0;i<NPREF;i++)
prefix[i]=NONWORD;
build(prefix,stdin);
add(prefix,NONWORD);
printf("\n");
generate(nwords);
return 0;
}



c++版本。。。。。代码要少些。。可是速度就。。。


#include<iostream>
#include<string>
#include<deque>
#include<map>
#include<vector>

using namespace std;


typedef deque<string> Prefix;
map<Prefix,vector<string> > statetab; //prefix--suffixs
const string NONWORD="\n";

enum{
NPREF = 2, //number of prefix words
NHASH = 4093, //size of state hash table array
MAXGEN = 10000 //maximum words generated
};

void add(Prefix& prefix,const string& s)
{
if(prefix.size() == NPREF)
{
statetab[prefix].push_back(s);
prefix.pop_front();
}
prefix.push_back(s);
}

void build(Prefix& prefix,istream& in)
{
string buf;
while(in>>buf)
add(prefix,buf);
}



void generate(int nwords)
{
Prefix prefix;
int i;
for(i=0;i<NPREF;i++)
add(prefix,NONWORD);
for(i=0;i<nwords;i++)
{
vector<string>& suf=statetab[prefix];
const string& w=suf[rand()%suf.size()];
if(w==NONWORD)
break;
cout<<w<<"\n";
prefix.pop_front();
prefix.push_back(w);
}
}


int main()
{
int nwords=MAXGEN;
Prefix prefix;
for(int i=0;i<NPREF;i++)
add(prefix,NONWORD);
build(prefix,cin);
add(prefix,NONWORD);
generate(nwords);
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值