2020-11-25统计英文文章中词汇个数里重复词汇数量

使用C/C++统计文章词汇数目,并统计重复词汇出现的次数。

#include<iostream>
#include<ios>
#include<iomanip>
#include<vector>
#include<ostream>
#include<math.h>
#include<algorithm>
#include<istream>
#include<iterator>
#include<string.h>
#include<fstream>

using namespace std;

int main()
{
    // char *src=(char *)"A CRC is a long-division remainder.  You add the CRC to the message,\
    // and the whole thing (message+CRC) is a multiple of the given \
    // CRC polynomial.  To check the CRC, you can either check that the \
    // CRC matches the recomputed value, *or* you can check that the \
    // remainder computed on the message+CRC is 0.  This latter approach \
    // is used by a lot of hardware implementations, and is why so many \
    // protocols put the end-of-frame flag after the CRC";
    char *src;
    src=(char*)malloc(1024*1024);
    memset(src,0,1024*1024);
    FILE *fd;
    fd=fopen("/home/hx/Desktop/Cpp_project/test.txt","rb+");
    fread(src,1024*1024,1,fd);

    
    char word_first[256][32];
    memset(word_first,0,sizeof(char)*256*32);
    char word[256][32];
    memset(word,0,sizeof(char)*256*32);
    char *word_src;
    int word_len=0;
    int num=1;
    while(1)
    {
        word_src=strchr(src,' ');
        
        if(word_src==NULL)
        {
            strcpy(word_first[num-1],src);
            break;
        }
        word_len=(int)(word_src-src);
        memcpy(word_first[num-1],src,word_len);
        ++num;
        src=word_src+1;
    }
    printf("Total vocabulary number:%d\n",num);
    int mm=0;
    for(int i=0;i<num;i++)
    {
        if(strlen(word_first[i])!=0)
        {
            strcpy(word[mm],word_first[i]);
            mm++;
        }
    }

    char *tmp=new(char);
    char *tmp_j=new(char);
    //tmp=(char*)malloc(sizeof(char)*32);
    //tmp_j=(char*)malloc(sizeof(char)*32);
    for(int i=0;i<num-1;i++)
    {
        int count=1;
        memset(tmp,0,sizeof(char)*32);
        if(strlen(word[i])!=0)
        {
            for(int m=0;m<strlen(word[i]);m++)
            {
                if((word[i][m]>=32&&int(word[i][m])<=64) || (word[i][m]>=91&&int(word[i][m])<=96) || (word[i][m]>=123&&int(word[i][m])<=127))
                {
                    if((m!=0&&word[i][m-1]>=65&&word[i][m+1]<=90&&m!=strlen(word[i])-1)||(m!=0&&word[i][m-1]>=97&&word[i][m+1]<=122&&m!=strlen(word[i])-1))
                        word[i][m]=word[i][m];
                    else
                        word[i][m]=' ';
                }
            }
            strcpy(tmp,word[i]);
            
            int blank_i=0;
            int len_i=strlen(tmp);
            for(;;)
            {
                if(strchr(tmp,' '))
                {
                    tmp=tmp+1;
                }
                if(blank_i==len_i-1)
                    break;
                blank_i++;    
            }
            for(int j=i+1;j<num;j++)
            {
                if(strlen(word[j])!=0)
                {
                    memset(tmp_j,0,sizeof(char)*32);
                    for(int m=0;m<strlen(word[j]);m++)
                    {
                        if((word[j][m]>=32&&word[j][m]<=64) || (word[j][m]>=91&&word[j][m]<=96) || (word[j][m]>=123&&word[j][m]<=127))
                        {
                            if((m!=0&&word[j][m-1]>=65&&word[j][m+1]<=90)&&m!=strlen(word[j])-1||(m!=0&&word[j][m-1]>=97&&word[j][m+1]<=122&&m!=strlen(word[j])-1))
                                word[j][m]=word[j][m];
                            else
                                word[j][m]=' ';
                            
                        }
                    }
                    strcpy(tmp_j,word[j]);

                    int blank_j=0;
                    int len_j=strlen(tmp_j);
                    for(;;)
                    {
                        if(strchr(tmp_j,' '))
                        {
                            tmp_j=tmp_j+1;
                        }
                        if(blank_j==len_j-1)
                            break;
                        blank_j++;
                    }
                    if(strcmp(tmp,tmp_j)==0)
                    {
                        count++;
                        memset(word[i],0,strlen(word[i]));
                        memset(word[j],0,strlen(word[j])); 
                    }
                }
            }
        }  
        if(strlen(tmp)!=0)
            cout<<setfill(' ')<<left<<setw(32)<<tmp<<setfill(' ')<<right<<setw(6)<<count<<endl;
        if(i==num-2&&strlen(word[num-1])!=0)
            cout<<setfill(' ')<<left<<setw(32)<<word[num-1]<<setfill(' ')<<right<<setw(6)<<1<<endl;
    }
    //free(tmp);
    //free(tmp_j);

}

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

想成专家

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值