自己用expact做的一个rss解析器,大体成功,记录一下心得,然后又时间继续进行 。
#include
<
stdio.h
>
#include < string .h >
#include < stdarg.h >
#include " ./expat_mine/expat.h "
int elementShowFlag = 0 ;
int elementTitleNum = 0 ;
char rssMainTitle[ 256 ] = " o " ;
char rssSubTitle[ 30 ][ 256 ] = ... {"o"} ;
char rssContent[ 30 ][ 5120 ] = ... {"o"} ;
char RssStoreBuf[ 256 * 1024 ];
int CharacterDataStat = 0 ; /**/ /*1main title 2sub titles 3cdata*/
int rssSubTitleNum = 0 ;
int rssContentNum = 0 ;
size_t rssLen;
void SetElementShowFlag( int value)
... {
elementShowFlag = value;
}
int GetElementShowFlag()
... {
return elementShowFlag;
}
int GetCharacterDataStat() /**/ /*1main title 2sub titles 3cdata*/
... {
return CharacterDataStat;
}
void rssDelSubString( int num, char * sor,...)
... {
va_list arg_ptr;
int i;
char *q;
int tempnum;
va_start(arg_ptr,sor);
for(tempnum=num; tempnum>0 ; tempnum--)
...{
char *arg = va_arg(arg_ptr,char*);
i=strlen(arg);
while((q=strstr(sor,arg))!=NULL)
...{
strcpy(q,q+i);
}
}
va_end(arg_ptr);
}
void rssDelElementString( int num, char * sor,...)
... {
va_list arg_ptr;
int templen = 0;
char *q;
char *qtemp;
int tempnum;
va_start(arg_ptr,sor);
for(tempnum=num; tempnum>0 ; tempnum--)
...{
char *arg = va_arg(arg_ptr,char*);
while((q=strstr(sor,arg))!=NULL)
...{
qtemp = q;
while(*qtemp != '>')
...{
qtemp++;
templen++;
}
strcpy(q,q+templen+1);
templen = 0;
}
}
va_end(arg_ptr);
}
// tag的开头处理函数
void startElement( void * userData, const char * name, const char ** atts)
... {
int i;
int *depthPtr = userData;
if((0 == strcmp(name,"title"))||(0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))
...{
if(0==strcmp(name,"title") )
...{
CharacterDataStat = (0==GetCharacterDataStat()) ? 1:2;
}
else if((0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))
...{
CharacterDataStat = 3;
}
for (i = 0; i < *depthPtr; i++)
putchar(' ');
printf ("XML: <%s", name);
i = 0;
while (atts[i] && *atts[i] && atts[i][0] != 0)
...{
if (i % 2)
...{
printf ("=");
printf (""%s"", atts[i]);
}
else
...{
printf (" ");
printf ("%s", atts[i]);
}
i++;
}
printf ("> ");
SetElementShowFlag(1);
}
*depthPtr += 1;
}
// tag的结束处理函数
void endElement( void * userData, const char * name)
... {
int *depthPtr = userData;
if((0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))
...{
rssDelElementString(2,rssContent[rssContentNum],"<A ","<IMG ");
rssContentNum++;
}
*depthPtr -= 1;
SetElementShowFlag(0);
}
// 字符处理函数
void CharacterDataHandler( void * userData, const XML_Char * s, int len)
... {
char strtmp[10240] = "o";
int i;
int tempstat;
if(GetElementShowFlag())
...{
for(i=0;i<len;i++)
...{
if(s[i] == ' ')
continue;
strtmp[i]=s[i];
}
strtmp[len]='o';
tempstat = GetCharacterDataStat();
// printf("tempstat:%d",tempstat);
switch(tempstat)
...{
case 1:
...{
// printf("main title*%s* ",strtmp);
strcpy(rssMainTitle,strtmp);
break;
}
case 2:
...{
// printf("Sub title*%s* ",strtmp);
strcpy(rssSubTitle[rssSubTitleNum],strtmp);
rssSubTitleNum++;
break;
}
case 3:
...{
if(0!=strlen(strtmp))
...{
// printf("Content~%s~ ",strtmp);
rssDelSubString(8,strtmp," ","<br/>","</FONT>","<P>","</P>","<DIV>","</DIV>","</A>");/**//*del 7 substr*/
// printf("Content_%s_ ",strtmp);
if(0!=strlen(strtmp))
...{
strcat(rssContent[rssContentNum],strtmp);
}
}
break;
}
}
//strcat(Cdatastring,strtmp);
//printf("%s ",strtmp);
}
}
void DefaultHandler( void * userData, const XML_Char * s, int len)
... {
char strtmp[1024] = "o";
int i;
...{
for(i=0;i<len;i++)
...{
if(s[i] == ' ')
continue;
strtmp[i]=s[i];
}
strtmp[len]='o';
printf("%s ",strtmp);
}
}
int main()
... {
openDoc();
rsstest(RssStoreBuf);
}
int rsstest( char * buf)
... {
int done = 0;
int depth = 0;
int i = 0;
XML_Parser parser = XML_ParserCreate(NULL);
XML_SetUserData(parser, &depth);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser,CharacterDataHandler);
// XML_SetDefaultHandlerExpand(parser,DefaultHandler);
if (XML_Parse(parser, buf, rssLen, done) == XML_STATUS_ERROR) return 1;
XML_ParserFree(parser);
/**//* printf("EXPACT Finished! ");
printf("^^^^^^^^^^^^^^^^^^ ");
printf("%s",Cdatastring);
printf("^^^^^^^^^^^^^^^^^^ ");
printf("%s ",rssMainTitle);
printf("^^^^^^^^^^^^^^^^^^ ");
for(i=0 ; i<rssSubTitleNum ;i++)
printf("%s ",rssSubTitle[i]);
*/ printf("^^^^^^^^^^^^^^^^^^ ");
for(i=0 ; i<rssContentNum ; i++)
printf("%s ",rssContent[i]);
return 0;
}
int openDoc()
... {
FILE* fp;
fp = fopen("./xml/bf1111.xml","rb");
if(!fp)
...{
printf("ERROR:XML NOT FOUND ");
return 1;
}
rssLen = fread(RssStoreBuf, 1, 256*1024, fp);
fclose(fp);
}
#include < string .h >
#include < stdarg.h >
#include " ./expat_mine/expat.h "
int elementShowFlag = 0 ;
int elementTitleNum = 0 ;
char rssMainTitle[ 256 ] = " o " ;
char rssSubTitle[ 30 ][ 256 ] = ... {"o"} ;
char rssContent[ 30 ][ 5120 ] = ... {"o"} ;
char RssStoreBuf[ 256 * 1024 ];
int CharacterDataStat = 0 ; /**/ /*1main title 2sub titles 3cdata*/
int rssSubTitleNum = 0 ;
int rssContentNum = 0 ;
size_t rssLen;
void SetElementShowFlag( int value)
... {
elementShowFlag = value;
}
int GetElementShowFlag()
... {
return elementShowFlag;
}
int GetCharacterDataStat() /**/ /*1main title 2sub titles 3cdata*/
... {
return CharacterDataStat;
}
void rssDelSubString( int num, char * sor,...)
... {
va_list arg_ptr;
int i;
char *q;
int tempnum;
va_start(arg_ptr,sor);
for(tempnum=num; tempnum>0 ; tempnum--)
...{
char *arg = va_arg(arg_ptr,char*);
i=strlen(arg);
while((q=strstr(sor,arg))!=NULL)
...{
strcpy(q,q+i);
}
}
va_end(arg_ptr);
}
void rssDelElementString( int num, char * sor,...)
... {
va_list arg_ptr;
int templen = 0;
char *q;
char *qtemp;
int tempnum;
va_start(arg_ptr,sor);
for(tempnum=num; tempnum>0 ; tempnum--)
...{
char *arg = va_arg(arg_ptr,char*);
while((q=strstr(sor,arg))!=NULL)
...{
qtemp = q;
while(*qtemp != '>')
...{
qtemp++;
templen++;
}
strcpy(q,q+templen+1);
templen = 0;
}
}
va_end(arg_ptr);
}
// tag的开头处理函数
void startElement( void * userData, const char * name, const char ** atts)
... {
int i;
int *depthPtr = userData;
if((0 == strcmp(name,"title"))||(0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))
...{
if(0==strcmp(name,"title") )
...{
CharacterDataStat = (0==GetCharacterDataStat()) ? 1:2;
}
else if((0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))
...{
CharacterDataStat = 3;
}
for (i = 0; i < *depthPtr; i++)
putchar(' ');
printf ("XML: <%s", name);
i = 0;
while (atts[i] && *atts[i] && atts[i][0] != 0)
...{
if (i % 2)
...{
printf ("=");
printf (""%s"", atts[i]);
}
else
...{
printf (" ");
printf ("%s", atts[i]);
}
i++;
}
printf ("> ");
SetElementShowFlag(1);
}
*depthPtr += 1;
}
// tag的结束处理函数
void endElement( void * userData, const char * name)
... {
int *depthPtr = userData;
if((0 == strcmp(name,"description"))||(0 == strcmp(name,"summary")))
...{
rssDelElementString(2,rssContent[rssContentNum],"<A ","<IMG ");
rssContentNum++;
}
*depthPtr -= 1;
SetElementShowFlag(0);
}
// 字符处理函数
void CharacterDataHandler( void * userData, const XML_Char * s, int len)
... {
char strtmp[10240] = "o";
int i;
int tempstat;
if(GetElementShowFlag())
...{
for(i=0;i<len;i++)
...{
if(s[i] == ' ')
continue;
strtmp[i]=s[i];
}
strtmp[len]='o';
tempstat = GetCharacterDataStat();
// printf("tempstat:%d",tempstat);
switch(tempstat)
...{
case 1:
...{
// printf("main title*%s* ",strtmp);
strcpy(rssMainTitle,strtmp);
break;
}
case 2:
...{
// printf("Sub title*%s* ",strtmp);
strcpy(rssSubTitle[rssSubTitleNum],strtmp);
rssSubTitleNum++;
break;
}
case 3:
...{
if(0!=strlen(strtmp))
...{
// printf("Content~%s~ ",strtmp);
rssDelSubString(8,strtmp," ","<br/>","</FONT>","<P>","</P>","<DIV>","</DIV>","</A>");/**//*del 7 substr*/
// printf("Content_%s_ ",strtmp);
if(0!=strlen(strtmp))
...{
strcat(rssContent[rssContentNum],strtmp);
}
}
break;
}
}
//strcat(Cdatastring,strtmp);
//printf("%s ",strtmp);
}
}
void DefaultHandler( void * userData, const XML_Char * s, int len)
... {
char strtmp[1024] = "o";
int i;
...{
for(i=0;i<len;i++)
...{
if(s[i] == ' ')
continue;
strtmp[i]=s[i];
}
strtmp[len]='o';
printf("%s ",strtmp);
}
}
int main()
... {
openDoc();
rsstest(RssStoreBuf);
}
int rsstest( char * buf)
... {
int done = 0;
int depth = 0;
int i = 0;
XML_Parser parser = XML_ParserCreate(NULL);
XML_SetUserData(parser, &depth);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser,CharacterDataHandler);
// XML_SetDefaultHandlerExpand(parser,DefaultHandler);
if (XML_Parse(parser, buf, rssLen, done) == XML_STATUS_ERROR) return 1;
XML_ParserFree(parser);
/**//* printf("EXPACT Finished! ");
printf("^^^^^^^^^^^^^^^^^^ ");
printf("%s",Cdatastring);
printf("^^^^^^^^^^^^^^^^^^ ");
printf("%s ",rssMainTitle);
printf("^^^^^^^^^^^^^^^^^^ ");
for(i=0 ; i<rssSubTitleNum ;i++)
printf("%s ",rssSubTitle[i]);
*/ printf("^^^^^^^^^^^^^^^^^^ ");
for(i=0 ; i<rssContentNum ; i++)
printf("%s ",rssContent[i]);
return 0;
}
int openDoc()
... {
FILE* fp;
fp = fopen("./xml/bf1111.xml","rb");
if(!fp)
...{
printf("ERROR:XML NOT FOUND ");
return 1;
}
rssLen = fread(RssStoreBuf, 1, 256*1024, fp);
fclose(fp);
}
FT,还是老规矩,/0全部用/o代替了~木有办法~