两个字符串相似度百分比匹配(支持中文字符串匹配)C++

#pragma once
#ifndef SIMILARITY_H
#define SIMILARITY_H

#include
#include

std::vectorstd::string getStringArray(std::string& SA)
{
std::vectorstd::string tmp;
for (size_t i = 0; i < SA.size()😉
{
if (SA[i] & 0x80)
{
tmp.push_back(SA.substr(i, 2));
i += 2;
}
else
tmp.push_back(SA.substr(i++, 1));
}
return tmp;
}

std::vector<size_t> getNextArray(std::vectorstd::string& sSA)
{
std::vector<size_t> gNA;
if (sSA.size() == 0)
return gNA;
gNA.push_back(0);
if (sSA.size() == 1)
return gNA;
size_t i = 0;
size_t j = 1;
while (i < sSA.size() && j < sSA.size() )
{
if (sSA[i] == sSA[j])
{
gNA.push_back(i + 1);
i++;
j++;
}
else
{
if (i == 0)
{
gNA.push_back(0);
j++;
}
else
{
while (i > 0 && sSA[i] != sSA[j])
{
i = gNA[i - 1];
}
if (sSA[i] == sSA[j])
{
gNA.push_back(i + 1);
i++;
j++;
}
else
{
gNA.push_back(0);
j++;
}
}
}
}
return gNA;
}

int KMP(std::vectorstd::string& mSA, std::string& sString)
{
std::vectorstd::string sSA = getStringArray(sString);
std::vector<size_t> sNA = getNextArray(sSA);
size_t j = 0;
for (size_t i = 0; i < mSA.size(); i++)
{
while (j>0&&mSA[i] != sSA[j])
{
j = sNA[j - 1];
}
if (mSA[i] == sSA[j])
{
j++;
}
if (j == sSA.size())
{
return i + 1 - sSA.size();
}
}
return -1;
}

int SIMILARITY(std::string mString, std::string sString)
{
std::vectorstd::string mSA = getStringArray(mString);
std::vectorstd::string sSA = getStringArray(sString);
size_t maxSubstring = 0;
for (size_t i = 0; i < sSA.size(); i++)
{
std::string tmp= sSA[i];
size_t j = i;
size_t count = 0;
while (KMP(mSA, tmp) != -1)
{
count++;
if (j + 1 == sSA.size())
break;
tmp += sSA[++j];
}
if (maxSubstring < count)
maxSubstring = count;
}
return ((double)maxSubstring / (double)mSA.size())

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值