删除HTML标记

#ifndef REQUIRE_H
#define REQUIRE_H
#include <cstdio>
#include <cstdlib>
#include <fstream>


inline void require(bool requirement,const char* msg="Requirement failed")
{
//为旧式编译器提供的局部语句"using namespace std":
using namespace std;
if(!requirement)
{
fputs(msg,stderr);
fputs("\n",stderr);
exit(EXIT_FAILURE);
}
}


inline void requireArgs(int argc,int args,const char* msg="Must use %d arguments")
{
using namespace std;
if(argc!=args+1)
{
fprintf(stderr,msg,args);
fputs("\n",stderr);
exit(EXIT_FAILURE);
}
}


inline void requireMinArgs(int argc,int minArgs,const char* msg="Must use at lease %d arguments")
{
using namespace std;
if(argc<minArgs+1)
{
fprintf(stderr,msg,minArgs);
fputs("\n",stderr);
exit(EXIT_FAILURE);
}
}
//三个重载的assure函数,分别用于ifstream,ofstream,fstream文件流的存在合法性检测
inline void assure(std::ifstream& in,const char* filename="")
{
using namespace std;
if(!in)
{
fprintf(stderr,"Could not open file%s\n",filename);
exit(EXIT_FAILURE);
}
}


inline void assure(std::ofstream& in,const char* filename="")
{
using namespace std;
if(!in)
{
fprintf(stderr,"Could not open file%s\n",filename);
exit(EXIT_FAILURE);
}
}


inline void assure(std::fstream& in,const char* filename="")
{
using namespace std;
if(!in)
{
fprintf(stderr,"Could not open file%s\n",filename);
exit(EXIT_FAILURE);
}
}

#endif

#ifndef REPLACEALL_H
#define REPLACEALL_H
#include <string>
using std::string;
std::string& replaceAll(std::string& context,const std::string& from,const std::string& to)
{
size_t lookHere=0;
size_t foundHere;
while((foundHere=context.find(from,lookHere))!=string::npos)
{
context.replace(foundHere,from.size(),to);
lookHere=foundHere+to.size();
}
return context;
}
#endif

#include <cstddef>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include "ReplaceAll.h"
#include "require.h"
using namespace std;


string& stripHTMLTags(string& s) throw(runtime_error)//-->warning
{
size_t leftPos;
while((leftPos=s.find('<'))!=string::npos)
{
size_t rightPos=s.find('>',leftPos+1);
if(rightPos==string::npos)
{
ostringstream msg;
msg<<"Incomplete HTML tag starting in position "
<<leftPos;
throw runtime_error(msg.str());
}
s.erase(leftPos,rightPos-leftPos+1);
}
//移除所有的特殊HTML字符
replaceAll(s,"&lt","<");
replaceAll(s,"&gt",">");
replaceAll(s,"&amp","&");
replaceAll(s,"&nbsp"," ");
//Etc...
return s;
}
int main(int argc,char* argv[1])
{
requireArgs(argc,1,"usage: HTMLStripper2 InputFile");//这种方法应该在命令行(进入当前可执行文件目录后)中输入删除HTML标记.exe test1.html
ifstream in(argv[1]);
assure(in,argv[1]);
//读取整个文件到字符串,然后分割字符串
ostringstream ss;
ss<<in.rdbuf();
try
{
string s=ss.str();
cout<<stripHTMLTags(s)<<endl;
return EXIT_SUCCESS;
}
catch(runtime_error& x)
{
cout<<x.what()<<endl;
return EXIT_FAILURE;
}
system("pause");
return 0;
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值