《RTFV1.7规范》中Demo解析

tianyapai

于 2019-01-14 10:42:58 发布

阅读量1k

点赞数

分类专栏： Rtf格式分析

本文链接：https://blog.csdn.net/tianyapai/article/details/86470700

版权

Rtf格式分析专栏收录该内容

4 篇文章 0 订阅

订阅专栏

对《RTFV1.7规范》中Demo进行了小部分的修改，并可以进行测试，以下是代表及相应的注解

#include "rtf.h"
#include <iostream>

int main()
{
	std::string strRtf = "{\\rtf1\\ansi{\\fonttbl{\\f0\\fswiss\\fprq2\\fcharset0 Arial;}"
	     "{\\f1\\fnil\\fprq2\\fcharset134 \'CB\'CE\'CC\'E5;}}"
"\r\n{\\colortbl\\red36\\green31\\blue237;\\red0\\green0\\blue0;}\r\n\\pard\\plain\\f0\\cf0\\fs48"
		"\\pard \\fi0 \\li0 \\ri0 \\qc \\sb0 \\sb0 \\sl0 \\tx1440 {\\f1 \\b1 \\i0 \\cf1 ASDFGHJKL}\\par }";
	Rtf rtf;
	rtf.convertRTF(strRtf);
	std::cout << rtf.cs;
    return 0;
}

-----------------------------------------Rtf.h文件----------------------------------

#pragma once
#include <string>
#include <vector>

class Rtf {

#define fTrue 1
#define fFalse 0
#define RTF_Eof -1

    // RTF parser error codes
	enum EcErrorCode {
		ecOK = 0,                       // Everything's fine!
		ecStackUnderflow = 1,           // Unmatched '}'
		ecStackOverflow = 2,			// Too many '{' -- memory exhausted
		ecUnmatchedBrace = 3,			// RTF ended during an open group.
		ecInvalidHex = 4,				// invalid hex character found in data
		ecBadTable = 5,					// RTF table (sym or prop) invalid
		ecAssertion = 6,				// Assertion failure
		ecEndOfFile = 7,				// End of file reached while reading RTF
		ecInvalidRtf = 8				// rtf	invalid
	};

	// Rtf Internal State【rtf内部状态：正常、2进制、16进制】
	enum RIS { 
		risNorm,
		risBin, 
		risHex 
	} ;  

	// Rtf Destination State 【正常、跳过】
	enum RDS{ 
		rdsNorm,
		rdsSkip
	} ;  

	// CHaracter Properties			//字符属性
	struct CHP
	{
		char fFont;
		char fBold;
		char fItalic;
		char fColorFont;
		char fUnderline;
	} ;                 

	typedef enum { justL, justR, justC, justF } JUST;
	// PAragraph Properties			//段属性
	struct PAP
	{
		int xaLeft;                 // left indent in twips //以缇为单位左缩进
		int xaRight;                // right indent in twips //以缇为单位左缩进
		int xaFirst;                // first line indent in twips //以缇为单位首行缩进
		JUST just;                  // justification
	} ;                  

	typedef enum { sbkNon, sbkCol, sbkEvn, sbkOdd, sbkPg } SBK;
	typedef enum { pgDec, pgURom, pgLRom, pgULtr, pgLLtr } PGN;
	// SEction Properties			//节属性
	struct SEP
	{
		int cCols;                  // number of columns
		SBK sbk;                    // section break type
		int xaPgn;                  // x position of page number in twips
		int yaPgn;                  // y position of page number in twips
		PGN pgnFormat;              // how the page number is formatted
	} ;                  

	// DOcument Properties			//文档属性
	struct DOP
	{
		int xaPage;                 // page width in twips
		int yaPage;                 // page height in twips
		int xaLeft;                 // left margin in twips
		int yaTop;                  // top margin in twips
		int xaRight;                // right margin in twips
		int yaBottom;               // bottom margin in twips
		int pgnStart;               // starting page number in twips
		char fFacingp;              // facing pages enabled?
		char fLandscape;            // landscape or portrait??
	} ;                  

	// property save structure		结构属性
	struct SAVE
	{
		struct SAVE *pNext;         // next save
		CHP chp;					//字符属性
		PAP pap;					//段落
		SEP sep;					//节属性
		DOP dop;					//文档属性

		RDS rds;					//正常跳过
		RIS ris;					//进制
	};

	// What types of properties are there?
	//这个关键字具体指的什么，通过这个索引判断文字要进行怎样的绘制【所有关键字的属性】
	enum IPROP{
		ipropFont, ipropBold, ipropItalic, ipropColorFont, ipropUnderline,
		ipropLeftInd, ipropRightInd, ipropFirstInd, ipropCols, ipropPgnX,
		ipropPgnY, ipropXaPage, ipropYaPage, ipropXaLeft,
		ipropXaRight, ipropYaTop, ipropYaBottom, ipropPgnStart,
		ipropSbk, ipropPgnFormat, ipropFacingp, ipropLandscape,
		ipropJust, ipropPard, ipropPlain, ipropSectd,
		ipropMax
	} ;

	enum ACTN{ actnSpec, actnByte, actnWord } ;             //指定的，字节的、字的
	enum PROPTYPE{ propChp, propPap, propSep, propFont, propDop } ;   //字符 段 节 文档

	struct PROP
	{
		ACTN actn;													// size of value    值得大小
		PROPTYPE prop;												// structure containing value  结构包含的值
		int  offset;												// offset of value from base of structure  结构体偏移值
	};

	enum IPFN{ ipfnBin, ipfnHex, ipfnSkipDest } ;       //
	enum IDEST{ idestPict, idestSkip } ;
	enum KWD{ kwdChar, kwdDest, kwdProp, kwdSpec } ;     //字符 编目覆盖层次 属性  专用字符

	//定义关键字，关键字的默认值，关键字是否都过默认值处理， 关键字属于那种类型中的属性【】，关键字字符串对应的枚举值【通过枚举值处理具体的绘制】
	struct SYM
	{
		char *szKeyword;										// RTF keyword    控制字
		int  dflt;												// default value to use  默认值
		bool fPassDflt;											// true to use default value from this table   true：用默认值
		KWD  kwd;												// base action to take     //如果控制字是***则采取的措施？？k--w--d是keyword
		int  idx;												// index into property table if kwd == kwdProp
																// index into destination table if kwd == kwdDest
																// character to print if kwd == kwdChar
	} ;

	enum FontFlag {
		fontYes,
		fontNo
	};

	enum FontFimaly {
		fnil,
		fswiss
	};

	struct Font {
		FontFimaly fontFimaly;    //字体族
		int fprq2;				  //字间距
		std::string  fontname;    //字体名字
	};

public:
	Rtf();
	~Rtf();

	int getRtfContent();

	void ugetRtfContent();

	EcErrorCode convertRTF(const std::string &rtfContent);

	EcErrorCode parseRtf();

	EcErrorCode ecPushRtfState();

	EcErrorCode ecPopRtfState();

	//ecEndGroupAction
	// The destination specified by rds is coming to a close.
	// If there's any cleanup that needs to be done, do it now.
	/********************************************************************************************/
	EcErrorCode ecEndGroupAction(RDS rds);


	//  ecParseRtfKeyword
	// get a control word (and its associated value) and
	// call ecTranslateKeyword to dispatch the control.
	//
	/********************************************************************************************/
	EcErrorCode ecParseRtfKeyword();

	// ecTranslateKeyword.
	// Search rgsymRtf for szKeyword and evaluate it appropriately.
	// Inputs:
	// szKeyword:   The RTF control to evaluate.   控制字
	// param:       The parameter of the RTF control.  控制字的参数
	// fParam:      fTrue if the control had a parameter; (that is, if param is valid)
	//              fFalse if it did not.如果fParam为fTrue这个关键字有参数，fFalse则没有参数
	/********************************************************************************************/
	EcErrorCode ecTranslateKeyword(char *szKeyword, int param, bool fParam);

	// ecApplyPropChange
	// Set the property identified by _iprop_ to the value _val_.
	//设置指定的属性
	/********************************************************************************************/
	EcErrorCode ecApplyPropChange(IPROP iprop, int val);

	EcErrorCode ecParseChar(int ch);

	// ecChangeDest
	// Change to the destination specified by idest.
	// There's usually more to do here than this...
	/********************************************************************************************/
	EcErrorCode ecChangeDest(IDEST idest);


	// ecPrintChar
	// Send a character to the output 
	//输出字符串
	/********************************************************************************************/
	EcErrorCode ecPrintChar(int ch);

	// ecParseSpecialProperty
	// Set a property that requires code to evaluate.
	/********************************************************************************************/
	Rtf::EcErrorCode ecParseSpecialProperty(IPROP iprop, int val);


	// ecParseSpecialKeyword
	// Evaluate an RTF control that needs special processing.
	/********************************************************************************************/
	EcErrorCode ecParseSpecialKeyword(IPFN ipfn);

private:
	std::string mRtfContent = "";
	size_t mCurrentPos = 0;
	size_t mSize = 0;

	SAVE *psave = 0;

	int cGroup = 0;
	CHP chp;				//字符
	PAP pap;				//段落
	SEP sep;				//节
	DOP dop;				//文档
	RDS rds = rdsNorm;		//这个组是否需要跳过
	RIS ris = risNorm;		//这个内容是正常字符，还是二进制，还是十六进制

	//包含主程序、rtf阅读器的主循环和rtf控制解析器
	bool fSkipDestIfUnk;   //如果未知的处理就跳过， skip destination if unkown
	long cbBin;            //???
	long lParam;           //参数

	int isymMax = 0;
	std::vector<SYM> rgsymRtf;

	// Property descriptions
	//属性描述
	std::vector<PROP> rgprop;//[ipropMax];

	int fontgroup = 0;
	std::vector<Font> fonts;

public:
	std::string cs;
};

-----------------------------------------Rtf.cpp文件----------------------------------

#include "Rtf.h"

Rtf::Rtf() {
	rgprop = {
		{ actnByte,   propChp,    offsetof(CHP, fFont) },       // ipropBold    fFont 属于propChp的属性，使用字节表示
		{ actnByte,   propChp,    offsetof(CHP, fBold) },       // ipropBold    fBold 属于propChp的属性，使用字节表示
		{ actnByte,   propChp,    offsetof(CHP, fItalic) },     // ipropItalic
		{ actnByte,   propChp,    offsetof(CHP, fColorFont) },  // ipropColor
		{ actnByte,   propChp,    offsetof(CHP, fUnderline) },  // ipropUnderline
		{ actnWord,   propPap,    offsetof(PAP, xaLeft) },      // ipropLeftInd
		{ actnWord,   propPap,    offsetof(PAP, xaRight) },     // ipropRightInd
		{ actnWord,   propPap,    offsetof(PAP, xaFirst) },     // ipropFirstInd
		{ actnWord,   propSep,    offsetof(SEP, cCols) },       // ipropCols
		{ actnWord,   propSep,    offsetof(SEP, xaPgn) },       // ipropPgnX
		{ actnWord,   propSep,    offsetof(SEP, yaPgn) },       // ipropPgnY
		{ actnWord,   propDop,    offsetof(DOP, xaPage) },      // ipropXaPage
		{ actnWord,   propDop,    offsetof(DOP, yaPage) },      // ipropYaPage
		{ actnWord,   propDop,    offsetof(DOP, xaLeft) },      // ipropXaLeft
		{ actnWord,   propDop,    offsetof(DOP, xaRight) },     // ipropXaRight
		{ actnWord,   propDop,    offsetof(DOP, yaTop) },       // ipropYaTop
		{ actnWord,   propDop,    offsetof(DOP, yaBottom) },    // ipropYaBottom
		{ actnWord,   propDop,    offsetof(DOP, pgnStart) },    // ipropPgnStart
		{ actnByte,   propSep,    offsetof(SEP, sbk) },         // ipropSbk
		{ actnByte,   propSep,    offsetof(SEP, pgnFormat) },   // ipropPgnFormat
		{ actnByte,   propDop,    offsetof(DOP, fFacingp) },    // ipropFacingp
		{ actnByte,   propDop,    offsetof(DOP, fLandscape) },  // ipropLandscape
		{ actnByte,   propPap,    offsetof(PAP, just) },        // ipropJust
		{ actnSpec,   propPap,    0 },                          // ipropPard
		{ actnSpec,   propChp,    0 },                          // ipropPlain*/
		{ actnSpec,   propSep,    0 }                          // ipropSectd
	};

	char f_[] = "f";
	char b_[] = "b";
	char i_[] = "i";
	char cf_[] = "cf";
	char u_[] = "u";
	char li_[] = "li";
	char ri_[] = "ri";
	char fi_[] = "fi";
	char cols_[] = "cols";
	char sbknone_[] = "sbknone";
	char sbkcol_[] = "sbkcol";
	char sbkeven_[] = "sbkeven";
	char sbkodd_[] = "sbkodd";
	char sbkpage_[] = "sbkpage";
	char pgnx_[] = "pgnx";
	char pgny_[] = "pgny";
	char pgndec_[] = "pgndec";
	char pgnucrm_[] = "pgnucrm";
	char pgnlcrm_[] = "pgnlcrm";
	char pgnucltr_[] = "pgnucltr";
	char pgnlcltr_[] = "pgnlcltr";
	char qc_[] = "qc";
	char ql_[] = "ql";
	char qr_[] = "qr";
	char qj_[] = "qj";
	char paperw_[] = "paperw";
	char paperh_[] = "paperh";
	char margl_[] = "margl";
	char margr_[] = "margr";
	char margt_[] = "margt";
	char margb_[] = "margb";
	char pgnstart_[] = "pgnstart";
	char facingp_[] = "facingp";
	char landscape_[] = "landscape";
	char par_[] = "par";
	char ao_[] = "\0x0a";
	char do_[] = "\0x0d";
	char tab_[] = "tab";
	char ldblquote_[] = "ldblquote";
	char rdblquote_[] = "rdblquote";
	char bin_[] = "bin";
	char x_[] = "*";
	char xx_[] = "'";
	char author_[] = "author";
	char buptim_[] = "buptim";
	char colortbl_[] = "colortbl";
	char comment_[] = "comment";
	char creatim_[] = "creatim";
	char doccomm_[] = "doccomm";
	char fonttbl_[] = "fonttbl";
	char footer_[] = "footer";
	char footerf_[] = "footerf";
	char footerl_[] = "footerl";
	char footerr_[] = "footerr";
	char footnote_[] = "footnote";
	char ftncn_[] = "ftncn";
	char ftnsep_[] = "ftnsep";
	char ftnsepc_[] = "ftnsepc";
	char header_[] = "header";
	char headerf_[] = "headerf";
	char headerl_[] = "headerl";
	char headerr_[] = "headerr";
	char info_[] = "info";
	char keywords_[] = "keywords";
	char operator_[] = "operator";
	char pict_[] = "pict";
	char printim_[] = "printim";
	char private1_[] = "private1";
	char revtim_[] = "revtim";
	char rxe_[] = "rxe";
	char stylesheet_[] = "stylesheet";
	char subject_[] = "subject";
	char tc_[] = "tc";
	char title_[] = "title";
	char txe_[] = "txe";
	char xe_[] = "xe";
	char ss_[] = "{";
	char ee_[] = "}";
	char ii_[] = "\\";

	// Keyword descriptions
	//下面的kwdDest都匹配idestSkip，意味着跳过，不解析了
	rgsymRtf = {
		//控制字    默认值    是否解析默认值 类型        属性索引
		//keyword     dflt    fPassDflt   kwd         idx
		{ f_,        0,      fFalse,     kwdProp,    ipropFont },
		{ b_,        1,      fFalse,     kwdProp,    ipropBold },
		{ i_,        1,      fFalse,     kwdProp,    ipropItalic },
		{ cf_,       1,      fFalse,     kwdProp,    ipropColorFont },
		{ u_,        1,      fFalse,     kwdProp,    ipropUnderline },
		{ li_,       0,      fFalse,     kwdProp,    ipropLeftInd },
		{ ri_,       0,      fFalse,     kwdProp,    ipropRightInd },
		{ fi_,       0,      fFalse,     kwdProp,    ipropFirstInd },
		{ cols_,     1,      fFalse,     kwdProp,    ipropCols },
		{ sbknone_,  sbkNon, fTrue,      kwdProp,    ipropSbk },
		{ sbkcol_,   sbkCol, fTrue,      kwdProp,    ipropSbk },
		{ sbkeven_,  sbkEvn, fTrue,      kwdProp,    ipropSbk },
		{ sbkodd_,   sbkOdd, fTrue,      kwdProp,    ipropSbk },
		{ sbkpage_,  sbkPg,  fTrue,      kwdProp,    ipropSbk },
		{ pgnx_,     0,      fFalse,     kwdProp,    ipropPgnX },
		{ pgny_,     0,      fFalse,     kwdProp,    ipropPgnY },
		{ pgndec_,   pgDec,  fTrue,      kwdProp,    ipropPgnFormat },
		{ pgnucrm_,  pgURom, fTrue,      kwdProp,    ipropPgnFormat },
		{ pgnlcrm_,  pgLRom, fTrue,      kwdProp,    ipropPgnFormat },
		{ pgnucltr_, pgULtr, fTrue,      kwdProp,    ipropPgnFormat },
		{ pgnlcltr_, pgLLtr, fTrue,      kwdProp,    ipropPgnFormat },
		{ qc_,       justC,  fTrue,      kwdProp,    ipropJust },
		{ ql_,       justL,  fTrue,      kwdProp,    ipropJust },
		{ qr_,       justR,  fTrue,      kwdProp,    ipropJust },
		{ qj_,       justF,  fTrue,      kwdProp,    ipropJust },
		{ paperw_,   12240,  fFalse,     kwdProp,    ipropXaPage },
		{ paperh_,   15480,  fFalse,     kwdProp,    ipropYaPage },
		{ margl_,    1800,   fFalse,     kwdProp,    ipropXaLeft },
		{ margr_,    1800,   fFalse,     kwdProp,    ipropXaRight },
		{ margt_,    1440,   fFalse,     kwdProp,    ipropYaTop },
		{ margb_,    1440,   fFalse,     kwdProp,    ipropYaBottom },
		{ pgnstart_, 1,      fTrue,      kwdProp,    ipropPgnStart },
		{ facingp_,  1,      fTrue,      kwdProp,    ipropFacingp },
		{ landscape_,1,      fTrue,      kwdProp,    ipropLandscape },
		//以上是属性就更新属性

		{ par_,      0,      fFalse,     kwdChar,    0x0a },
		{ ao_,    0,      fFalse,     kwdChar,       0x0a },
		{ do_,    0,      fFalse,     kwdChar,       0x0a },
		{ tab_,      0,      fFalse,     kwdChar,    0x09 },
		{ ldblquote_, 0,      fFalse,     kwdChar,    char('"') },
		{ rdblquote_, 0,      fFalse,     kwdChar,    char('"') },
		//大部分是转义字符

		{ bin_,      0,      fFalse,     kwdSpec,    ipfnBin },
		{ x_,        0,      fFalse,     kwdSpec,    ipfnSkipDest },
		{ xx_,        0,      fFalse,     kwdSpec,    ipfnHex },


		{ author_,   0,      fFalse,     kwdDest,    idestSkip },
		{ buptim_,   0,      fFalse,     kwdDest,    idestSkip },
		{ colortbl_, 0,      fFalse,     kwdDest,    idestSkip },
		{ comment_,  0,      fFalse,     kwdDest,    idestSkip },
		{ creatim_,  0,      fFalse,     kwdDest,    idestSkip },
		{ doccomm_,  0,      fFalse,     kwdDest,    idestSkip },

		{ fonttbl_,  0,      fFalse,     kwdDest,    idestSkip },

		{ footer_,   0,      fFalse,     kwdDest,    idestSkip },
		{ footerf_,  0,      fFalse,     kwdDest,    idestSkip },
		{ footerl_,  0,      fFalse,     kwdDest,    idestSkip },
		{ footerr_,  0,      fFalse,     kwdDest,    idestSkip },
		{ footnote_, 0,      fFalse,     kwdDest,    idestSkip },
		{ ftncn_,    0,      fFalse,     kwdDest,    idestSkip },
		{ ftnsep_,   0,      fFalse,     kwdDest,    idestSkip },
		{ ftnsepc_,  0,      fFalse,     kwdDest,    idestSkip },
		{ header_,   0,      fFalse,     kwdDest,    idestSkip },
		{ headerf_,  0,      fFalse,     kwdDest,    idestSkip },
		{ headerl_,  0,      fFalse,     kwdDest,    idestSkip },
		{ headerr_,  0,      fFalse,     kwdDest,    idestSkip },
		{ info_,     0,      fFalse,     kwdDest,    idestSkip },
		{ keywords_, 0,      fFalse,     kwdDest,    idestSkip },
		{ operator_, 0,      fFalse,     kwdDest,    idestSkip },
		{ pict_,     0,      fFalse,     kwdDest,    idestSkip },
		{ printim_,  0,      fFalse,     kwdDest,    idestSkip },
		{ private1_, 0,      fFalse,     kwdDest,    idestSkip },
		{ revtim_,   0,      fFalse,     kwdDest,    idestSkip },
		{ rxe_,      0,      fFalse,     kwdDest,    idestSkip },
		{ stylesheet_,   0,      fFalse,     kwdDest,    idestSkip },
		{ subject_,  0,      fFalse,     kwdDest,    idestSkip },
		{ tc_,       0,      fFalse,     kwdDest,    idestSkip },
		{ title_,    0,      fFalse,     kwdDest,    idestSkip },
		{ txe_,      0,      fFalse,     kwdDest,    idestSkip },
		{ xe_,       0,      fFalse,     kwdDest,    idestSkip },
		//跳过是跳过
		
		{ ss_,        0,      fFalse,     kwdChar,    char('{') },
		{ ee_,        0,      fFalse,     kwdChar,    char('}') },
		{ ii_,       0,      fFalse,     kwdChar,    char('\\') }
		
		//大部分为转义字符
	};
	isymMax = rgsymRtf.size();
}

Rtf::~Rtf() {

}

int Rtf::getRtfContent() {

	if (mCurrentPos >= mSize)
		return RTF_Eof;

	return mRtfContent[mCurrentPos++];
}

void Rtf::ugetRtfContent() {
	mCurrentPos--;
}

Rtf::EcErrorCode Rtf::convertRTF(const std::string &rtfContent) {
	
	mRtfContent = rtfContent;

	mSize = mRtfContent.size();
	if (mSize < 10)
		return ecInvalidRtf;

	return parseRtf();
}

Rtf::EcErrorCode Rtf::parseRtf() {

	int ch;
	Rtf::EcErrorCode ec;
	int cNibble = 2;  //针对16进制的处理，每两个字符代表一个字符
	int b = 0;

	while ((ch = getRtfContent()) != RTF_Eof)
	{
		if (cGroup < 0)
			//如果cGroup < 0 即 } 的个数大于 { 的个数
			return ecStackUnderflow;
		if (ris == risBin) {
			//如果是二进制直接解析字符
			if ((ec = ecParseChar(ch)) != ecOK)
				return ec;
		}
		else {
			switch (ch) {
			case '{':
				if ((ec = ecPushRtfState()) != ecOK)
					return ec;
				break;
			case '}':
				if ((ec = ecPopRtfState()) != ecOK)
					return ec;
				break;
			case '\\':
				if ((ec = ecParseRtfKeyword()) != ecOK)
					return ec;
				break;
			case 0x0d:
			case 0x0a:
				break;
			default:
				if (ris == risNorm)    //如果后面的字符是正常字符
				{
					//处理正常字符
					if ((ec = ecParseChar(ch)) != ecOK)
						return ec;
				}
				else
				{   // parsing hex data
					if (ris != risHex)
						return ecAssertion;

					//处理16进制字符
					b = b << 4;
					if (isdigit(ch))   //如果是0-9
						b += (char)ch - '0';
					else                //如果是a-f
					{
						if (islower(ch))  //如果是小写
						{
							if (ch < 'a' || ch > 'f')
								return ecInvalidHex;
							b += (char)ch - 'a';
						}
						else             //如果是大写
						{
							if (ch < 'A' || ch > 'F')
								return ecInvalidHex;
							b += (char)ch - 'A';
						}
					}
					cNibble--;  //每次最多处理两位十六进制 【例如:\e1,e是一个字符，1是一个字符，这两个字符代表一个ansi字】
					if (!cNibble)  //处理两次，后将cNibble = 2重置为2， b重置为0， ris重置为正常
					{
						if ((ec = ecParseChar(b)) != ecOK)
							return ec;
						cNibble = 2;
						b = 0;
						ris = risNorm;
					}
				}                   // end else (ris != risNorm)
				break;
			}
		}
	}

	if (cGroup < 0)
		return ecStackUnderflow;
	if (cGroup > 0)
		return ecUnmatchedBrace;
	return ecOK;
}

Rtf::EcErrorCode Rtf::ecPushRtfState() {
	SAVE *psaveNew = (SAVE *)malloc(sizeof(SAVE));
	if (!psaveNew)
		return ecStackOverflow;

	psaveNew->pNext = psave;
	//将当前状态压栈【每一个组中有所有的信息字符、节、段落、文档，即字符集组】
	psaveNew->chp = chp;
	psaveNew->pap = pap;
	psaveNew->sep = sep;
	psaveNew->dop = dop;
	psaveNew->rds = rds;    //当前组是否要跳过去
	psaveNew->ris = ris;    //保存当前的状态到结构中

	ris = risNorm;          //重置当前组中的内容是正常内容，不是二进制、十六进制

	psave = psaveNew;		//将组添加到链表中
	cGroup++;				//每次进入一个组都会增加一
	return ecOK;
}

Rtf::EcErrorCode Rtf::ecPopRtfState() {
	Rtf::SAVE *psaveOld;
	Rtf::EcErrorCode ec;

	if (!psave)
		return ecStackUnderflow;

	//暂时没有用
	if (rds != psave->rds)
	{
		if ((ec = ecEndGroupAction(rds)) != ecOK)
			return ec;
	}

	//重置当前状态为父组的状态
	chp = psave->chp;
	pap = psave->pap;
	sep = psave->sep;
	dop = psave->dop;
	rds = psave->rds;
	ris = psave->ris;

	psaveOld = psave;
	psave = psave->pNext;
	cGroup--;				//每次解析完一个组都会减少一
	free(psaveOld);			//释放一个组
	return ecOK;
}

Rtf::EcErrorCode Rtf::ecEndGroupAction(RDS rds)
{
	return ecOK;
}

Rtf::EcErrorCode Rtf::ecParseRtfKeyword()
{
	char fParam = fFalse; //是否需要参数
	char fNeg = fFalse;   //是否为负数
	int  param = 0;
	char *pch = 0;
	char szKeyword[30];
	char szParameter[20];

	szKeyword[0] = '\0';
	szParameter[0] = '\0';

	int  ch;
	if ((ch = getRtfContent()) == RTF_Eof)
		return ecEndOfFile;

	// a control symbol; no delimiter. 不是英文字符返回0
	if (!isalpha(ch))          
	{
		//不是英文字符【是转移字符或者是非法字符】,这种字符不会带参数
		szKeyword[0] = (char)ch;
		szKeyword[1] = '\0';
		return ecTranslateKeyword(szKeyword, 0, fParam);
	}

	//获取一个一个的字符拼接成关键字
	for (pch = szKeyword; isalpha(ch); ch = getRtfContent())
		*pch++ = (char)ch;

	*pch = '\0';

	//如果当前字符的是‘-’符号，表示负数
	if (ch == '-')
	{
		fNeg = fTrue;  //负数
					   //取出下一个字符保存在ch中
		if ((ch = getRtfContent()) != RTF_Eof)
			return ecEndOfFile;//如果这是最后一个字符，返回
	}

	if (isdigit(ch))//如果当前字符是数字
	{
		// a digit after the control means we have a parameter 控制字符后有数字，代表这个控制字的参数
		fParam = fTrue;         

		//获取一个一个的字符拼接成数字
		for (pch = szParameter; isdigit(ch); ch = getRtfContent())
			*pch++ = (char)ch;
		*pch = '\0';

		//转换成数字
		param = atoi(szParameter);
		if (fNeg)   //如果是负数加符号
			param = -param;

		//转换成参数
		lParam = atol(szParameter);
	}

	//如果当前字符不是空格，ungetc将字符退回到fp中，如果是空格则继续处理【文字字库的处理\fcharset0 Arial】
	if (ch != ' ')
		ugetRtfContent();

	//转换关键字
	return ecTranslateKeyword(szKeyword, param, fParam);
}

Rtf::EcErrorCode Rtf::ecTranslateKeyword(char *szKeyword, int param, bool fParam)
{
	int isym;
	// search for szKeyword in rgsymRtf

	//将关键字与关键字集合进行比较，看看是哪一个关键字
	for (isym = 0; isym < isymMax; isym++)
		if (strcmp(szKeyword, rgsymRtf[isym].szKeyword) == 0)
			break;

	if (isym == isymMax)            // control word not found  比较完成没有找到控制字
	{
		if (fSkipDestIfUnk)         // if this is a new destination 如果是一个新的关键字，设置当前组的内容为跳过
			rds = rdsSkip;          // skip the destination
									// else just discard it
		fSkipDestIfUnk = fFalse;
		return ecOK;
	}

	// found it!  use kwd and idx to determine what to do with it.
	//通过kwd和idx确定要做什么
	fSkipDestIfUnk = fFalse;

	//查询这个关键字要怎么处理
	switch (rgsymRtf[isym].kwd)
	{
	case kwdProp:                                  //关键字带属性，取出参数进行处理
		if (rgsymRtf[isym].fPassDflt || !fParam)   //如果有默认参数或者fParam为fFalse则使用默认参数，否则使用解析的参数【\b或者\b0或者\b1】
			param = rgsymRtf[isym].dflt;
		//应用属性
		return ecApplyPropChange((IPROP)rgsymRtf[isym].idx, param);
	case kwdChar:                                  //关键字后是字符，就解析字符【例如:\*】
		return ecParseChar(rgsymRtf[isym].idx);
	case kwdDest:                                   //引入的新目标----跳过
		return ecChangeDest((IDEST)rgsymRtf[isym].idx);
	case kwdSpec:                                   //对于特殊的指定的关键字，怎么处理
		return ecParseSpecialKeyword((IPFN)rgsymRtf[isym].idx);
	default:
		return ecBadTable;
	}
	return ecBadTable;
}

Rtf::EcErrorCode Rtf::ecApplyPropChange(IPROP iprop, int val)
{
	char *pb = nullptr;

	if (rds == rdsSkip)                 // If we're skipping text,
		return ecOK;                    // don't do anything.

	//该属性属于哪个层次的关键字
	switch (rgprop[iprop].prop)
	{
	case propDop:
		pb = (char *)&dop;
		break;
	case propSep:
		pb = (char *)&sep;
		break;
	case propPap:
		pb = (char *)&pap;
		break;
	case propChp:
		pb = (char *)&chp;
		break;
	default:
		if (rgprop[iprop].actn != actnSpec)
			return ecBadTable;
		break;
	}

	//将字节填到相应的位置上
	switch (rgprop[iprop].actn)
	{
	case actnByte:
		pb[rgprop[iprop].offset] = (unsigned char)val;
		break;
	case actnWord:
		(*(int *)(pb + rgprop[iprop].offset)) = val;
		break;
	case actnSpec:
		return ecParseSpecialProperty(iprop, val);
		break;
	default:
		return ecBadTable;
	}
	return ecOK;
}

Rtf::EcErrorCode Rtf::ecParseChar(int ch) {
	// 如果当前的内容是二进制
	if (ris == risBin && --cbBin <= 0)
		ris = risNorm;

	switch (rds)
	{
	case rdsSkip:
		// Toss this character.   //扔掉这个字符
		return ecOK;
	case rdsNorm:
		// Output a character. Properties are valid at this point.  输出这个字符，属性是有效的
		return ecPrintChar(ch);
	default:
		// handle other destinations....     处理其他的东西....
		return ecOK;
	}
}

Rtf::EcErrorCode Rtf::ecChangeDest(IDEST idest)
{
	if (rds == rdsSkip)             // if we're skipping text, don't do anything 如果跳过就不做任何事情
		return ecOK;

	rds = rdsSkip;                 //设置跳过标记
	/*switch (idest)
	{
	default:
	rds = rdsSkip;              // when in doubt, skip it...
	break;
	}*/
	return ecOK;
}

Rtf::EcErrorCode Rtf::ecPrintChar(int ch)
{
	//这里可以存储字符串和rtf的当前状态，用于绘制
	// Store the Extracted Character to Output Cstring   存储这个额外的字符到字符串中
	cs += ch;
	return ecOK;
}

Rtf::EcErrorCode Rtf::ecParseSpecialProperty(IPROP iprop, int val)
{
	switch (iprop)
	{
	case ipropPard:
		memset(&pap, 0, sizeof(pap));
		return ecOK;
	case ipropPlain:
		memset(&chp, 0, sizeof(chp));
		return ecOK;
	case ipropSectd:
		memset(&sep, 0, sizeof(sep));
		return ecOK;
	default:
		return ecBadTable;
	}
	return ecBadTable;
}

Rtf::EcErrorCode Rtf::ecParseSpecialKeyword(IPFN ipfn)
{
	//如果是跳过的或者不是二进制
	if (rds == rdsSkip && ipfn != ipfnBin)  // if we're skipping, and it's not
		return ecOK;                        // the \bin keyword, ignore it.

	switch (ipfn)
	{
	case ipfnBin:
		ris = risBin;
		cbBin = lParam;
		break;
	case ipfnSkipDest:
		fSkipDestIfUnk = fTrue;
		break;
	case ipfnHex:
		ris = risHex;
		break;
	default:
		return ecBadTable;
	}
	return ecOK;
}

tianyapai

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
《RTFV1.7规范》中Demo解析

对《RTFV1.7规范》中Demo进行了小部分的修改，并可以进行测试，以下是代表及相应的注解#include "rtf.h"#include &lt;iostream&gt;int main(){ std::string strRtf = "{\\rtf1\\ansi{\\fonttbl{\\f0\\fswiss\\fprq2\\fcharset0 Arial;}" ...
复制链接

扫一扫