Copyright (C) Huawei Technologies Co., Ltd. 2012-2020. All rights reserved.
*********************************************************************************************************************
FILE NAME : "translate.cpp"
DESCRIPTION : code check. TRANSLATE function.
HISTORY :
AUTHOR DATE VERSION MODIFICATION
sundianwei(43541) 2012-07-25 C-CHECK V1.0 first generate
*********************************************************************************************************************/
#include "translate.hpp"
/********************************************************************************************************************/
bool Translate(const CTLInputParam& tlInput, CTLResult& tlResult)
{
/**************************************************************************
请在此实现语言翻译算法
本题目注意点:
1. 性能设计
**************************************************************************/
return true;
}
int main()
{
CTLInputParam tlInput;
CFile inputFile("input.txt");
if (!inputFile.ReadInput(tlInput))
{
printf("invalid input, terminate translate!\r\n");
return -1;
}
CTLResult tlResult;
if (!Translate(tlInput, tlResult))
{
printf("translate failed!\r\n");
return -1;
}
CFile outputFile("output.txt");
if (!outputFile.WriteOutput(tlResult))
{
printf("output result failed!\r\n");
return -1;
}
return 0;
}
/********************************************************************************************************************
Copyright (C) Huawei Technologies Co., Ltd. 2012-2020. All rights reserved.
*********************************************************************************************************************
FILE NAME : "translate.hpp"
DESCRIPTION : code check. TRANSLATE function.
HISTORY :
AUTHOR DATE VERSION MODIFICATION
sundianwei(43541) 2012-07-25 C-CHECK V1.0 first generate
*********************************************************************************************************************/
#ifndef __TRANSLATE_HPP__
#define __TRANSLATE_HPP__
#include "file.hpp"
/********************************************************************************************************************/
//translate input
class CTLInputParam : public CInputParam
{
private:
#define MAX_BREVIARY_NUM 10000
#define MAX_BREVIARY_LEN 10
#define MAX_ORIGINAL_LEN 255
#define MAX_TEXT_LEN 1000000
public:
CTLInputParam()
: m_breviaryNum(0),
m_resourceTextLen(0),
m_breviaryWords(NULL),
m_originalWords(NULL),
m_resourceText(NULL)
{
}
virtual ~CTLInputParam()
{
for (size_t i = 0; i < m_breviaryNum; ++i)
{
if (NULL != m_breviaryWords[i])
{
delete[] m_breviaryWords[i];
}
if (NULL != m_originalWords[i])
{
delete[] m_originalWords[i];
}
}
if (NULL != m_breviaryWords)
{
delete[] m_breviaryWords;
m_breviaryWords = NULL;
}
if (NULL != m_originalWords)
{
delete[] m_originalWords;
m_originalWords = NULL;
}
if (NULL != m_resourceText)
{
delete[] m_resourceText;
m_resourceText = NULL;
}
m_breviaryNum = 0;
m_resourceTextLen = 0;
}
public:
virtual bool Input(FILE* fp)
{
if (NULL == fp)
{
return false;
}
//scanf the m_breviaryNum.
if (fscanf(fp, "%d", &m_breviaryNum) < 0)
{
printf("CTLInputParam::Input: fscanf breviaryNum failed.\r\n");
return false;
}
if ((m_breviaryNum > MAX_BREVIARY_NUM) || (0 == m_breviaryNum))
{
printf("CTLInputParam::Input: fscanf breviaryNum is invalid %u.\r\n", m_breviaryNum);
return false;
}
//scanf breviary
m_breviaryWords = new char*[m_breviaryNum];
m_originalWords = new char*[m_breviaryNum];
memset(m_breviaryWords, 0, sizeof(char*) * m_breviaryNum);
memset(m_originalWords, 0, sizeof(char*) * m_breviaryNum);
for (unsigned int i = 0; i < m_breviaryNum; ++i)
{
m_breviaryWords[i] = new char[MAX_BREVIARY_LEN + 1];
m_originalWords[i] = new char[MAX_ORIGINAL_LEN + 1];
memset(m_breviaryWords[i], 0, MAX_BREVIARY_LEN + 1);
memset(m_originalWords[i], 0, MAX_ORIGINAL_LEN + 1);
if (fscanf(fp, "%s", m_breviaryWords[i]) < 0)
{
printf("CTLInputParam::Input: fscanf the %u\'th breviary word failed.\r\n", i + 1);
return false;
}
if (!CheckBreviary(m_breviaryWords[i]))
{
printf("CTLInputParam::Input: invalid breviary word %s at the %u\'th line.\r\n", m_breviaryWords[i], i + 1);
return false;
}
if (fscanf(fp, "%s", m_originalWords[i]) < 0)
{
printf("CTLInputParam::Input: fscanf the %u\'th original word failed.\r\n", i + 1);
return false;
}
if (!CheckOriginal(m_originalWords[i]))
{
printf("CTLInputParam::Input: invalid original word %s at the %u\'th line.\r\n", m_originalWords[i], i + 1);
return false;
}
}
//ignore the "\r\n"
m_resourceText = new char[MAX_TEXT_LEN + 1];
size_t ignoreLen = fread(m_resourceText, sizeof(char), 1, fp);
if (1 != ignoreLen)
{
printf("CTLInputParam::Input: jump for source text failed.\r\n");
return false;
}
//read resource text
m_resourceTextLen = fread(m_resourceText, sizeof(char), MAX_TEXT_LEN, fp);
if (0 == m_resourceTextLen)
{
printf("CTLInputParam::Input: invalid source text len %u.\r\n", m_resourceTextLen);
return false;
}
m_resourceText[m_resourceTextLen] = '\0';
return true;
}
private:
inline
bool CheckBreviary(char* breviaryWord)
{
unsigned int len = 0;
while ('\0' != *breviaryWord)
{
++len;
if ((len > MAX_BREVIARY_LEN)
|| ((*breviaryWord < 'A') || (*breviaryWord > 'Z')))
{
return false;
}
++breviaryWord;
}
return true;
}
inline
bool CheckOriginal(char* originalWord)
{
return ('\0' == originalWord[MAX_ORIGINAL_LEN]);
}
public:
unsigned int m_breviaryNum;
unsigned int m_resourceTextLen;
char** m_breviaryWords;
char** m_originalWords;
char* m_resourceText;
};
//translate result
class CTLResult : public CResult
{
public:
CTLResult() : m_translateTextLen(0), m_translateText(NULL)
{
}
virtual ~CTLResult()
{
if (NULL != m_translateText)
{
delete[] m_translateText;
m_translateText = NULL;
}
m_translateTextLen = 0;
}
public:
virtual bool OutPut(FILE* fp)
{
if (m_translateTextLen > 0)
{
size_t len = fwrite(m_translateText, sizeof(char), m_translateTextLen, fp);
return (len == m_translateTextLen);
}
return true;
}
public:
unsigned int m_translateTextLen;
char* m_translateText;
};
#endif
/********************************************************************************************************************
Copyright (C) Huawei Technologies Co., Ltd. 2012-2020. All rights reserved.
*********************************************************************************************************************
FILE NAME : "file.hpp"
DESCRIPTION : read input paramaters and write result into result file.
HISTORY :
AUTHOR DATE VERSION MODIFICATION
sundianwei(43541) 2012-07-25 C-CHECK V1.0 first generate
*********************************************************************************************************************/
#ifndef __FILE_HPP__
#define __FILE_HPP__
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/********************************************************************************************************************/
//base class for input
class CInputParam
{
public:
CInputParam()
{
}
virtual ~CInputParam()
{
}
public:
virtual bool Input(FILE* fp) = 0;
};
//base class for result
class CResult
{
public:
CResult()
{
}
virtual ~CResult()
{
}
public:
virtual bool OutPut(FILE* fp) = 0;
};
//class for file input and output.
class CFile
{
public:
CFile(char* name)
{
size_t len = strlen(name);
m_pFileName = new char[len + 1];
memcpy(m_pFileName, name, len);
m_pFileName[len] = '\0';
}
virtual ~CFile()
{
delete[] m_pFileName;
}
public:
bool ReadInput(CInputParam& inputParam)
{
FILE* fp = fopen(m_pFileName, "r+");
if(NULL == fp)
{
printf("CFile::ReadInput: open file %s failed.\n", m_pFileName);
return false;
}
bool ret = inputParam.Input(fp);
fclose(fp);
return ret;
}
bool WriteOutput(CResult& result)
{
FILE* fp = fopen(m_pFileName, "w+");
if(NULL == fp)
{
printf("CFile::ReadInput: open file %s failed.\n", m_pFileName);
return false;
}
bool ret = result.OutPut(fp);
fclose(fp);
return ret;
}
private:
char* m_pFileName;
};
#endif
语言翻译机
----2006年百度之星程序设计大赛初赛题目
公司的工程师们是非常注重效率的,在长期的开发与测试过程中,他们逐渐创造了一套他们独特的缩率语。他们在平时的交谈,会议,甚至在各中技术文档中都会大量运用。
为了让新员工可以更快地适应公司的文化,更好地阅读公司的技术文档,人力资源部决定开发一套专用的翻译系统,把相关文档中的缩率语和专有名词翻译成日常语言。
输入:
输入数据包含三部分
1. 第一行包含一个整数N(N<=10000),表示总共有多少个缩率语的词条。
2. 紧接着有 N 行的输入,每行包含两个字符串,以空格隔开。第一个字符串为缩率语(仅包含大写英文字符,长度不超过10),第二个字符串为日常语言(不包含空格,长度不超过255) .
3. 从第N+2开始到输入结束为包含缩略语的相关文档。(总长度不超过1000000个字符)
输出数据:
输出将缩率语转换成日常语言的文档。(将缩率语转换成日常语言,其他字符保留原样),最大保留目标文档为10000000个字符。
输入样例:
6
PS 门户搜索部
NLP 自然语言处理
PM 产品市场部
HR 人力资源部
PMD 产品推广部
MD 市场发展部
百度的部门包括PS,PM,HR,PMD,MD等等,其中PS还包括NLP小组。
输出样例:
百度的部门包括门户搜索部,产品市场部,人力资源部,产品推广部,市场发展部等等,其中门户搜索部还包括自然语言处理小组。
注意:
1. 输入数据中是中英文、阿拉伯数字混合的,包含空格,逗号,回车换行等符号,中文采用GBK编码。
2. 为保证答案的唯一性,缩率语的转换采用正向最大匹配(从左到右为正方向)的原则。请注意输入例子中 PMD 的翻译。
如:
AB 市场
BCD 研发
源文档:2012年,ABCD部门的经营指标是2亿dollar。
翻译后:2012年,市场CD部门的经营指标是2亿dollar。
自A开始最大匹配,并不需要纯粹的最长匹配("AB"长度小于"BCD"长度),
要求:
1. 代码符合编程规范
2. 代码结构划分合理
3. 功能正确
4. 性能表现良好
参考信息:
中文汉字及符号的编码,请参见附件《GBK_百度百科.mht》