freeictclas java_freeICTCLAS中科院中文分词(拼音中文输入法设计和源代码)

//

//ICTCLAS简介:计算所汉语词法分析系统ICTCLAS(Institute of Computing Technology, Chinese Lexical Analysis System),

// 功能有:中文分词;词性标注;未登录词识别。

// 分词正确率高达97.58%(973专家评测结果),

// 未登录词识别召回率均高于90%,其中中国人名的识别召回率接近98%;

// 处理速度为31.5Kbytes/s。

//著作权: Copyright?2002-2005中科院计算所 职务著作权人:张华平 刘群

//遵循协议:自然语言处理开放资源许可证1.0

//Email: zhanghp@链接已屏蔽

//Homepage:链接已屏蔽;链接已屏蔽

/****************************************************************************

*

* Copyright (c) 2000, 2001

* Machine Group

* Software Research Lab.

* Institute of Computing Tech.

* Chinese Academy of Sciences

* All rights reserved.

*

* This file is the confidential and proprietary property of

* Institute of Computing Tech. and the posession or use of this file requires

* a written license from the author.

* Filename: Span.h:

* Abstract:

* interface for the CSpan class.

* Author: Kevin Zhang

* (zhanghp@链接已屏蔽)

* Date: 2002-4-23

*

* Notes: Tagging with Hidden Markov Model

*

****************************************************************************/

#if !defined(AFX_SPAN_H__178113DA_8D45_4D47_B6DA_CB62C001BC35__INCLUDED_)

#define AFX_SPAN_H__178113DA_8D45_4D47_B6DA_CB62C001BC35__INCLUDED_

#if _MSC_VER > 1000

#pragma once

#endif // _MSC_VER > 1000

#include "..\\Utility\\Dictionary.h"

#include "..\\Utility\\ContextStat.h"

#include "..\\Segment\\DynamicArray.h"

#define MAX_WORDS_PER_SENTENCE 120

#define MAX_UNKNOWN_PER_SENTENCE 200

#define MAX_POS_PER_WORD 20

#define LITTLE_FREQUENCY 6

enum TAG_TYPE{

TT_NORMAL,

TT_PERSON,

TT_PLACE,

TT_TRANS_PERSON

};

class CSpan

{

public:

bool PlaceRecognize(CDictionary &dictCore,CDictionary &placeDict);

bool PersonRecognize(CDictionary &personDict);

bool POSTagging(PWORD_RESULT pWordItems,CDictionary &dictCore,CDictionary &dictUnknown);

//POS tagging with Hidden Markov Model

void SetTagType(enum TAG_TYPE nType=TT_NORMAL);

//Set the tag type

bool LoadContext(char *sFilename);

CSpan();//CDictionary &dict

virtual ~CSpan();

int m_nUnknownIndex;

//The number of unknown word

int m_nUnknownWords[MAX_UNKNOWN_PER_SENTENCE][2];

//The start and ending possition of unknown position

ELEMENT_TYPE m_dWordsPossibility[MAX_UNKNOWN_PER_SENTENCE];

//The possibility of unknown words

CContextStat m_context;//context

protected:

ELEMENT_TYPE ComputePossibility(int nStartPos,int nLength,CDictionary &dict);

int GetFrom(PWORD_RESULT pWordItems,int nIndex,CDictionary &dictCore,CDictionary &dictUnknown);

//Get words from the word items, start from nIndex, Function for unknown words recognition

bool GuessPOS(int nIndex,int *pSubIndex);

bool GetBestPOS();

bool Reset(bool bContinue=true);

bool Disamb();

private:

enum TAG_TYPE m_tagType;//The type of tagging

int m_nStartPos;

int m_nBestTag[MAX_WORDS_PER_SENTENCE];

//Record the Best Tag

char m_sWords[MAX_WORDS_PER_SENTENCE][WORD_MAXLENGTH];

int m_nWordPosition[MAX_WORDS_PER_SENTENCE];

int m_nTags[MAX_WORDS_PER_SENTENCE][MAX_POS_PER_WORD];

char m_nBestPrev[MAX_WORDS_PER_SENTENCE][MAX_POS_PER_WORD];

char m_nCurLength;

double m_dFrequency[MAX_WORDS_PER_SENTENCE][MAX_POS_PER_WORD];

};

#endif // !defined(AFX_SPAN_H__178113DA_8D45_4D47_B6DA_CB62C001BC35__INCLUDED_)



更多源码 | 好库简介 | 网站地图 | 帮助中心 | 版权说明

Copyright© 2009-2012 OKBASE.NET All Rights Reserved 好库网 版权所有

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值