两个字符串中最大公共字符数(支持汉字)

【导读】

    最近遇到一个问题,可以抽象为:求两个字符串中最大公共字符数并输出所有公共字符。


【正文】

    给定字符串str1和str2, 借鉴“两个字符串中最大公共子串”的问题,给出递推公式:

f(i, j) :  str1中[0-i] 和str2中[0-j] 的最大公共字符数

f(i, j) = max { 

(1). 1 + f(i - 1, j - 1)  // if (str1[i] == str2[j])

(2). f(i - 1, j)              // if (str1[i] != str2[j])

(3). f(i, j - 1)              // if (str1[i] != str2[j])

}


代码实现:

CommonCharSearcher.h

#include <vector>

typedef struct
{
    int pos1;
    int pos2;
    bool is_en;
}MatchInfo;

class CommonCharSearcher
{
public:
        enum {NOT_MATCH, MATCH_CHAR, MATCH_TWO_CHAR};
	enum {UNKNOWN, CHAR, TWO_CHAR};

public:
	CommonCharSearcher();

        void search_most_common_char(const char *buff1, int len1, const char* buff2, int len2);

	int get_max_match_num();

	const std::vector<MatchInfo>& get_match_info();

private:
	int match(const char* buff1, int i, const char* buff2, int j);

	int is_match_word(const char* buff1, int i, const char* buff2, int j);

	int get_word_type(const char* buff, int i);

	void print(int len1, int len2); // 输出备忘录数组信息

	void back_track_exec(const char* buff1, int i, const char* buff2, int j, int match_num); // 找到最大值后回溯

private:
        int m_match_num;
	int m_max_pos1;
	int m_max_pos2;

	char *m_buff1;
	char *m_buff2;

	int **m_match_arr;

        std::vector<MatchInfo> m_match_infos;

};

CommonCharSearcher.cpp

#include "stdafx.h"
#include "CommonCharSearcher.h"
#include <iostream>

CommonCharSearcher::CommonCharSearcher()
{
	m_match_num = 0;
	m_max_pos1 = -1;
	m_max_pos2 = -1;

	m_buff1 = NULL;
	m_buff2 = NULL;

	m_match_arr = NULL;
}

void CommonCharSearcher::search_most_common_char(const char *buff1, int len1, const char* buff2, int len2)
{
	if (buff1 == NULL || len1 <= 0 ||
		buff2 == NULL || len2 <= 0)
	{
		return;
	}

	m_match_arr = new int*[len1];
	for (int i = 0; i < len1; ++i)
	{
		m_match_arr[i] = new int[len2];
		for (int j = 0; j < len2; ++j)
		{
			m_match_arr[i][j] = -1;
		}
	}

	for (int i = 0; i < len1; ++i)
	{
		for (int j = 0; j < len2; ++j)
		{
			int curr = this->match(buff1, i, buff2, j);
			if (curr > m_match_num)
			{
				m_match_num = curr;
				m_max_pos1 = i;
				m_max_pos2 = j;
			}
		}
	}

	
	print(len1, len2);
	std::cout << "(" << m_max_pos1 << "," << m_max_pos2 << ") = " << m_match_num << std::endl;
	
	if (m_max_pos1 != -1 && m_max_pos2 != -1)
	{
		this->back_track_exec(buff1, m_max_pos1, buff2, m_max_pos2, m_match_num);
	}

	
	for (i = m_match_infos.size() - 1; i >= 0 ; --i)
	{
		std::cout << "pos1 = " << m_match_infos[i].pos1 << ", pos2 = " << m_match_infos[i].pos2 << ", is_en = " << m_match_infos[i].is_en << std::endl;
	}

	// 释放资源
	for (int i = 0; i < len1; ++i)
	{
		delete [] m_match_arr[i];
	}
	delete [] m_match_arr;

	return;
}


int CommonCharSearcher::get_max_match_num()
{
	return this->m_match_num;
}

const std::vector<MatchInfo>& CommonCharSearcher::get_match_info()
{
	return this->m_match_infos;
}

int CommonCharSearcher::match(const char* buff1, int i, const char* buff2, int j)
{
	if (buff1 == NULL || i < 0 ||
		buff2 == NULL || j < 0)
	{
		return 0;
	}

	// 已经计算过
	if (m_match_arr[i][j] != -1)
	{
		return m_match_arr[i][j];
	}

	int type1 = this->get_word_type(buff1, i);
	int type2 = this->get_word_type(buff2, j);
	if (type1 == CommonCharSearcher::UNKNOWN)
	{
		m_match_arr[i][j] = this->match(buff1, i - 1, buff2, j);
		return m_match_arr[i][j];
	}

	if (type2 == CommonCharSearcher::UNKNOWN)
	{
		m_match_arr[i][j] = this->match(buff1, i, buff2, j - 1);
		return m_match_arr[i][j];
	}

	int max = 0;
	int max1 = 0;
	int max2 = 0;
	
	if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::CHAR)
	{
		if (buff1[i] == buff2[j])
		{
			max = 1 + match(buff1, i - 1, buff2, j - 1);
		}
		
		max1 = match(buff1, i, buff2, j - 1);
		max2 = match(buff1, i - 1, buff2, j);
	}
	else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::TWO_CHAR)
	{
		if (buff1[i] == buff2[j] && buff1[i-1] == buff2[j-1])
		{
			max = 1 + match(buff1, i - 2, buff2, j - 2);
		}
		
		max1 = match(buff1, i, buff2, j - 2);
		max2 = match(buff1, i - 2, buff2, j);
	}
	else if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::TWO_CHAR)
	{
		max1 = match(buff1, i, buff2, j - 2);
		max2 = match(buff1, i - 1, buff2, j);
	}
	else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::CHAR)
	{
		max1 = match(buff1, i, buff2, j - 1);
		max2 = match(buff1, i - 2, buff2, j);
	}

	if (max1 > max)
	{
		max = max1;
	}
	if (max2 > max)
	{
		max = max2;
	}

	m_match_arr[i][j] = max;
	return m_match_arr[i][j];
}

void CommonCharSearcher::print(int len1, int len2)
{
	for (int i = 0; i < len1; ++i)
	{
		for (int j = 0; j < len2; ++j)
		{
			std::cout << (int)m_match_arr[i][j] << "\t";
		}

		std::cout << std::endl;
	}
}

int CommonCharSearcher::is_match_word(const char* buff1, int i, const char* buff2, int j)
{
	if (buff1 == NULL || i < 0 ||
		buff2 == NULL || j < 0)
	{
		return false;
	}

	if (buff1[i] >= 0 && buff2[j] >= 0)
	{
		if (buff1[i] == buff2[j])
		{
			return MATCH_CHAR;
		}
	}
	else if (buff1[i] < 0 && buff2[j] < 0)
	{
		if (buff1[i] == buff2[j] && buff1[i+1] == buff2[j+1])
		{
			return MATCH_TWO_CHAR;
		}
	}

	return NOT_MATCH;
}

int CommonCharSearcher::get_word_type(const char* buff, int i)
{
	if (buff[i] >= 0)
	{
		return CommonCharSearcher::CHAR;
	}
	else
	{
		if (i-1>=0 && buff[i-1] < 0)
		{
			return CommonCharSearcher::TWO_CHAR;
		}
	}

	return CommonCharSearcher::UNKNOWN;
}

void CommonCharSearcher::back_track_exec(const char* buff1, int i, const char* buff2, int j, int match_num)
{
	if (i < 0 || j < 0 || match_num <= 0)
	{
		return;
	}

	int type1 = this->get_word_type(buff1, i);
	int type2 = this->get_word_type(buff2, j);

	if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::CHAR)
	{
		if (buff1[i] == buff2[j])
		{
			MatchInfo match_info;
			match_info.is_en = true;
			match_info.pos1 = i;
			match_info.pos2 = j;
			m_match_infos.push_back(match_info);

			if (m_match_infos.size() == m_match_num)
			{
				return;
			}

			back_track_exec(buff1, i - 1, buff2, j - 1, match_num - 1);
		}
		else
		{
			back_track_exec(buff1, i, buff2, j - 1, match_num);
			if (m_match_infos.size() == m_match_num)
			{
				return;
			}

			back_track_exec(buff1, i - 1, buff2, j, match_num);
		}
	}
	else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::TWO_CHAR)
	{
		if (buff1[i] == buff2[j] && buff1[i-1] == buff2[j-1])
		{
			MatchInfo match_info;
			match_info.is_en = false;
			match_info.pos1 = i-1;
			match_info.pos2 = j-1;
			m_match_infos.push_back(match_info);

			if (m_match_infos.size() == m_match_num)
			{
				return;
			}

			back_track_exec(buff1, i - 2, buff2, j - 2, match_num - 1);
		}
		else
		{
			back_track_exec(buff1, i, buff2, j - 2, match_num);
			if (m_match_infos.size() == m_match_num)
			{
				return;
			}

			back_track_exec(buff1, i - 2, buff2, j, match_num);
		}
	}
	else if (type1 == CommonCharSearcher::CHAR && type2 == CommonCharSearcher::TWO_CHAR)
	{
		back_track_exec(buff1, i, buff2, j - 2, match_num);
		if (m_match_infos.size() == m_match_num)
		{
			return;
		}

		back_track_exec(buff1, i - 1, buff2, j, match_num);
	}
	else if (type1 == CommonCharSearcher::TWO_CHAR && type2 == CommonCharSearcher::CHAR)
	{
		back_track_exec(buff1, i, buff2, j - 1, match_num);
		if (m_match_infos.size() == m_match_num)
		{
			return;
		}

		back_track_exec(buff1, i - 2, buff2, j, match_num);
	}
}

注意:笔者的问题场景需要支持中文,全角按半角处理。如果仅仅需要支持英文字符串,代码会简洁很多。


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值