#include<iostream>
#include<algorithm>
#include<cstdio>
#include<stdlib.h>
#include<pthread.h>
#include<map>
#include<string>
#include<vector>
using namespace std;
void *count(void *filename); //统计单词数
void allcount_printf(); //输出结果
map<string,int> word_count; //存放单词对应出现次数
map<string,int>::iterator iter; //迭代器
bool cmp(const pair<string,int> &x,const pair<string,int> &y)
{
return x.second>y.second;
}
pthread_mutex_t mutex;
int main(int argc,char *argv[]){
clock_t start,finish;
double totaltime;
start=clock();
pthread_t tid[100];
//初始化互斥锁
pthread_mutex_init(&mutex,NULL);
//创建子线程
for(int i=1;i<argc;i++)
pthread_create(&tid[i],NULL,count,(void *)argv[i]);
//使主线程等待子线程
for(int i=1;i<argc;i++)
pthread_join(tid[i],NULL);
//销毁互斥锁
pthread_mutex_destroy(&mutex);
//输出一共有多少不同的单词
int cnt=word_count.size();
printf("total number of words:%d\n",cnt);
//按出现频率大小输出所有单词
allcount_printf();
finish=clock();
totaltime=(double)(finish-start)/CLOCKS_PER_SEC;
cout<<"\n多线程程序的运行时间为"<<totaltime<<"秒!"<<endl;
}
void *count(void *filename)
{
char *fname=(char *)filename;
FILE *fp=NULL;
char tmp[100];
//打开并读取文件
if((fp=fopen(fname,"r"))==NULL){
cout<<"error"<<endl;
}
else
{
while(fscanf(fp,"%s",tmp)!=EOF){
string str=tmp;
//将所有单词都变成小写 避免因大小写区分相同单词
transform(str.begin(),str.end(),str.begin(),::tolower);
//去掉字符串中的标点符号
string::iterator pos=str.begin();
while(pos!=str.end())
{
if(ispunct(*pos))
str.erase(pos);
else
pos++;
}
//判断map中是否为空或者读入的单词是否已经插入
if(word_count.empty()){
pthread_mutex_lock(&mutex);
word_count.insert(pair<string,int>(str,1));
pthread_mutex_unlock(&mutex);
}
else if(word_count.find(str)==word_count.end()){
pthread_mutex_lock(&mutex);
word_count.insert(pair<string,int>(str,1));
pthread_mutex_unlock(&mutex);
}
else{
pthread_mutex_lock(&mutex);
word_count[str]++;
pthread_mutex_unlock(&mutex);
}
}
}
}
void allcount_printf()
{
//创建一个vector实现map按value进行降序排序
vector< pair<string,int> >word;
for(iter=word_count.begin();iter!=word_count.end();iter++)
word.push_back(pair<string,int>(iter->first,iter->second));
sort(word.begin(),word.end(),cmp);
//输出TOP10热词
printf("TOP10热词为:\n");
for(int i=0;i<10;i++)
cout<<word[i].first<<":"<<word[i].second<<endl;
//按出现频率大小降序输出每个单词
printf("各单词出现频率为:\n");
for(int i=0;i!=word.size();i++)
cout<<word[i].first<<":"<<word[i].second<<endl;
}
运行结果: