【导读】
最近遇到一个问题,可以抽象为:求两个字符串中最大公共字符数并输出所有公共字符。
【正文】
给定字符串str1和str2, 借鉴“两个字符串中最大公共子串”的问题,给出递推公式:
f(i, j) : str1中[0-i] 和str2中[0-j] 的最大公共字符数
f(i, j) = max {
(1). 1 + f(i - 1, j - 1) // if (str1[i] == str2[j])
(2). f(i - 1, j) // if (str1[i] != str2[j])
(3). f(i, j - 1) // if (str1[i] != str2[j])
}
代码实现:
CommonCharSearcher.h
#include <vector>
typedef struct
{
int pos1;
int pos2;
bool is_en;
}MatchInfo;
class CommonCharSearcher
{
public:
enum {NOT_MATCH, MATCH_CHAR, MATCH_TWO_CHAR};
enum {UNKNOWN, CHAR, TWO_CHAR};
public:
CommonCharSearcher();
void search_most_common_char(const char *buff1, int len1, const char* buff2, int len2);
int get_max_match_num();
const std::vector<MatchInfo>& get_match_info();
private:
int match(const char* buff1, int i, const char* buff2, int j);
int is_match_word(const char* buff1, int i, const char* buff2, int j);
int get_word_type(const char* buff, int i);
void print(int len1, int len2); // 输出备忘录数组信息
void back_track_exec(const char* buff1, int i, const char* buff2, int j, int match_num); // 找到最大值后回溯
private:
int m_match_num;
int m_max_pos1;
int m_max_pos2;
char *m_buff1;
char *m_buff2;
int **m_match_arr;
std::vector<MatchInfo> m_match_infos;
};
CommonCharSearcher.cpp
#include "stdafx.h"
#include "CommonCharSearcher.h"
#include <iostream>
CommonCharSearcher::CommonCharSearcher()
{
m_match_num = 0;
m_max_pos1 = -1;
m_max_pos2 = -1;
m_buff1 = NULL;
m_buff2 = NULL;
m_match_arr = NULL;
}
void CommonCharSearcher::search_most_common_char(const char *buff1, int len1, const char* buff2, int len2)
{
if (buff1 == NULL || len1 <= 0 ||
buff2 == NULL || len2 <= 0)
{
return;
}
m_match_arr = new int*[len1];
for (int i = 0; i < len1; ++i)
{
m_match_arr[i] = new int[len2];
for (int j = 0; j < len2; ++j)
{
m_match_arr[i][j] = -1;
}
}
for (int i = 0; i < len1; ++i)
{
for (int j = 0; j < len2; ++j)
{
int curr = this->match(buff1, i, buff2, j);
if (curr > m_match_num)
{
m_match_num = curr;
m_max_pos1 = i;
m_max_pos2 = j;
}
}
}
print(len1, len2);
std::cout << "(" << m_max_pos1 << "," << m_max_pos2 << ") = " << m_match_num << std::endl;
if (m_max_pos1 != -1 && m_max_pos2 != -1)
{
this->back_track_exec(buff1, m_max_pos1, buff2, m_max_pos2, m_match_num);
}
for (i = m_match_infos.size() - 1; i >= 0 ; --i)
{
std::cout << "pos1 = " << m_match_infos[i].pos1 << ", pos2 = " << m_match_infos[i].pos2 << ", is_en = " << m_match_infos[i].is_en << std::endl;
}
// 释放资源
for (int i = 0; i < len1; ++i)
{
delete [] m_match_arr[i];
}
delete [] m_match_arr;
return;
}
int CommonCharSearcher::get_max_match_num()
{
return this->m_match_num;
}
const std::vector<MatchInfo>& CommonCharSearcher::get_match_info()
{
return this->m_match_infos;
}
int CommonCharSearcher::match(const char* buff1, int i, const char* buff2, int j)
{
if (buff1 == NULL || i < 0 ||
buff2 == NULL || j < 0)
{
return 0;
}
// 已经计算过
if (m_match_arr[i][j] != -1)
{
return m_match_arr[i][j];
}
int type1 = this->get_word_type(buff1, i);
int type2 = this->get_word_type(buff2, j);
if (type1 == CommonCharSearcher::UNKNOWN)
{
m_match_arr[i][j] = this->match(buff1, i - 1, buff2, j);
return m_match_arr[i][j];
}
if (type2 == CommonCharSearcher::UNKNOWN)
{
m_match_arr[i][j] = this->match(buff1, i, buff2, j - 1);
return m_match_arr[i][j];
}
int max = 0;
int max1 = 0;
int max2 = 0;
if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::CHAR)
{
if (buff1[i] == buff2[j])
{
max = 1 + match(buff1, i - 1, buff2, j - 1);
}
max1 = match(buff1, i, buff2, j - 1);
max2 = match(buff1, i - 1, buff2, j);
}
else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::TWO_CHAR)
{
if (buff1[i] == buff2[j] && buff1[i-1] == buff2[j-1])
{
max = 1 + match(buff1, i - 2, buff2, j - 2);
}
max1 = match(buff1, i, buff2, j - 2);
max2 = match(buff1, i - 2, buff2, j);
}
else if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::TWO_CHAR)
{
max1 = match(buff1, i, buff2, j - 2);
max2 = match(buff1, i - 1, buff2, j);
}
else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::CHAR)
{
max1 = match(buff1, i, buff2, j - 1);
max2 = match(buff1, i - 2, buff2, j);
}
if (max1 > max)
{
max = max1;
}
if (max2 > max)
{
max = max2;
}
m_match_arr[i][j] = max;
return m_match_arr[i][j];
}
void CommonCharSearcher::print(int len1, int len2)
{
for (int i = 0; i < len1; ++i)
{
for (int j = 0; j < len2; ++j)
{
std::cout << (int)m_match_arr[i][j] << "\t";
}
std::cout << std::endl;
}
}
int CommonCharSearcher::is_match_word(const char* buff1, int i, const char* buff2, int j)
{
if (buff1 == NULL || i < 0 ||
buff2 == NULL || j < 0)
{
return false;
}
if (buff1[i] >= 0 && buff2[j] >= 0)
{
if (buff1[i] == buff2[j])
{
return MATCH_CHAR;
}
}
else if (buff1[i] < 0 && buff2[j] < 0)
{
if (buff1[i] == buff2[j] && buff1[i+1] == buff2[j+1])
{
return MATCH_TWO_CHAR;
}
}
return NOT_MATCH;
}
int CommonCharSearcher::get_word_type(const char* buff, int i)
{
if (buff[i] >= 0)
{
return CommonCharSearcher::CHAR;
}
else
{
if (i-1>=0 && buff[i-1] < 0)
{
return CommonCharSearcher::TWO_CHAR;
}
}
return CommonCharSearcher::UNKNOWN;
}
void CommonCharSearcher::back_track_exec(const char* buff1, int i, const char* buff2, int j, int match_num)
{
if (i < 0 || j < 0 || match_num <= 0)
{
return;
}
int type1 = this->get_word_type(buff1, i);
int type2 = this->get_word_type(buff2, j);
if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::CHAR)
{
if (buff1[i] == buff2[j])
{
MatchInfo match_info;
match_info.is_en = true;
match_info.pos1 = i;
match_info.pos2 = j;
m_match_infos.push_back(match_info);
if (m_match_infos.size() == m_match_num)
{
return;
}
back_track_exec(buff1, i - 1, buff2, j - 1, match_num - 1);
}
else
{
back_track_exec(buff1, i, buff2, j - 1, match_num);
if (m_match_infos.size() == m_match_num)
{
return;
}
back_track_exec(buff1, i - 1, buff2, j, match_num);
}
}
else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::TWO_CHAR)
{
if (buff1[i] == buff2[j] && buff1[i-1] == buff2[j-1])
{
MatchInfo match_info;
match_info.is_en = false;
match_info.pos1 = i-1;
match_info.pos2 = j-1;
m_match_infos.push_back(match_info);
if (m_match_infos.size() == m_match_num)
{
return;
}
back_track_exec(buff1, i - 2, buff2, j - 2, match_num - 1);
}
else
{
back_track_exec(buff1, i, buff2, j - 2, match_num);
if (m_match_infos.size() == m_match_num)
{
return;
}
back_track_exec(buff1, i - 2, buff2, j, match_num);
}
}
else if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::TWO_CHAR)
{
back_track_exec(buff1, i, buff2, j - 2, match_num);
if (m_match_infos.size() == m_match_num)
{
return;
}
back_track_exec(buff1, i - 1, buff2, j, match_num);
}
else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::CHAR)
{
back_track_exec(buff1, i, buff2, j - 1, match_num);
if (m_match_infos.size() == m_match_num)
{
return;
}
back_track_exec(buff1, i - 2, buff2, j, match_num);
}
}
注意:笔者的问题场景需要支持中文,全角按半角处理。如果仅仅需要支持英文字符串,代码会简洁很多。