建立词袋子模型




int ConstructMap(map<string,vector<pair<int,int>>>&mymap,int beginindex,int endindex)
{
    
//    vector<string> mySplit(string s);
     set<string>MakeStopSet();
    vector<string>goodWordsinPieceArticle(string rawtext,set<string>stopwords);
    CoInitialize(NULL);
    _ConnectionPtr pConn(__uuidof(Connection));
    _RecordsetPtr pRst(__uuidof(Recordset));
    char * select =new char[5000];
    memset(select,0,5000);
    char *firstpart="select CKeyWord,ArticleId,CAbstract from Article where ArticleId between ";
    char *lastpart=" order by ArticleId";
    char middlepart1[100];
    char middlepart2[100];
    sprintf_s(middlepart1,sizeof(middlepart1),"%d",beginindex);
    sprintf_s(middlepart2,sizeof(middlepart2),"%d",endindex);
    strcat(select,firstpart);
    strcat(select,middlepart1);
    strcat(select," and ");
    strcat(select,middlepart2);
    strcat(select,lastpart);
    pConn->ConnectionString="Provider=SQLOLEDB.1;Password=xxxxxx;Persist Security Info=True; User ID=sa;Initial Catalog=ArticleCollection";
    pConn->Open("","","",adConnectUnspecified);
    pRst=pConn->Execute(select,NULL,adCmdText);
    set<string>stopwords=MakeStopSet();
    while(!pRst->rsEOF)
    {    vector<string>wordcollection;
        //string keywordstr=(_bstr_t)pRst->GetCollect("CKeyWord");
        string rawtext=(_bstr_t)pRst->GetCollect("CAbstract");
        if(rawtext!="")
        {
                wordcollection=goodWordsinPieceArticle(rawtext,stopwords);
                string tempid=(_bstr_t)pRst->GetCollect("ArticleId");
                int articleid=atoi(tempid.c_str());
                for(vector<string>::iterator strit=wordcollection.begin();strit!=wordcollection.end();strit++)
                {
                    vector<pair<int,int>>::iterator it;
                    if(mymap[*strit].empty())
                    {
                        pair<int,int>mytemppair=make_pair(articleid,1);
                        mymap[*strit].push_back(mytemppair);

                    }
                    else
                    {
                        for(it=mymap[*strit].begin();it!=mymap[*strit].end();it++)
                        {  
                            if(it->first==articleid)
                            {
                                it->second=++(it->second);
                                break;
                            }
                    
                        }
                        if(it==mymap[*strit].end())
                        {
                            pair<int,int>mytemppair=make_pair(articleid,1);
                            mymap[*strit].push_back(mytemppair);
                        }

                    }

            }
            

        }
        
        
        pRst->MoveNext();
        wordcollection.clear();
    }
    pRst->Close();
    pConn->Close();
    pRst.Release();
    pConn.Release();
    CoUninitialize();
    delete[] select;
    return 0;

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值