-
- package net.risesoft.riseinfo.integration.parse;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.Reader;
- import java.io.StringReader;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Map;
- import java.util.Set;
- import java.util.StringTokenizer;
- import com.hothouseobjects.tags.Inspector;
- import com.hothouseobjects.tags.Tag;
- import com.hothouseobjects.tags.TagTiller;
- /*
- * 从html中将img标签的src属性解析出来,并对解析的图片地址进行处理,
- * 主要是为了解决组织部中组工网不在OA上,但信息发布是用fckedit做的,
- * 他的图片不能直接和数据一起通过webservices传到组工网上的问题
- * $author sking huang $date 2008-11-11
- */
- public class ParseHtml {
- private String htmlSrc=null;
- private Map tagList =new HashMap();
- private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory
- .getLog(ParseHtml.class);
- public ParseHtml(String htmlSrc){
- this.htmlSrc = htmlSrc;
- }
- //向标签中列表中增加img标签
- private void initTagToList(){
- //增加取得<a href=...的html标签
- // tagList.put("a", new String[]{"href"});
- // 增加取得<img src=...的html标签
- tagList.put("img", new String[]{"src"});
- }
- public void addTagList(String key,String[] value){
- tagList.put(key, value);
- }
- public void remove(String key){
- tagList.remove(key);
- }
- public List parse(){
- List imageName=new ArrayList();
- log.debug("********开始解析html标签***********");
- //增加标签列表
- initTagToList();
- try{
- Reader read = new StringReader(htmlSrc);
- TagTiller tagtiller = new TagTiller(read);
- tagtiller.runTiller();
- Tag thePage = tagtiller.getTilledTags();
- Set tagSet = tagList.entrySet();
- Iterator iter = tagSet.iterator();
- //从标签列表中取出要解析的标签,并将解析完的标签加入标签列表
- while(iter.hasNext())
- {
- Map.Entry entry = (Map.Entry)iter.next();
- String key = (String)entry.getKey();
- String[] value = (String[]) entry.getValue();
- if(key == null || "".equals(key)){
- continue;
- }
- if(value == null || value.length==0){
- continue;
- }
- List theHref = Inspector.collectByType(thePage,key);
- int i = theHref.size();
- while (i>0) {
- for(int ii=0;ii<value.length;ii++){
- String filterStr=filterStr(((Tag)theHref.get(i-1)).getAttributeValue(value[ii]));
- if(filterStr!=null){
- imageName.add(filterStr);
- }
- }
- i -=1;
- }
- }
- log.debug("********html标签解析完毕***********");
- }catch(Exception e){
- log.error("在解析html的过程中出现问题", e);
- }
- return imageName;
- }
- //对字符串进行过滤
- private String filterStr(String addr){
- if(addr==null) return addr;
- StringTokenizer parser =new StringTokenizer(addr,"/"///");
- String rtn="";
- //取最后一个,因为最后一个为图片的名字
- while(parser.hasMoreTokens()) {
- rtn=parser.nextToken();
- }
- return rtn;
- }
- public static void main(String[] args) {
- try {
- File file = new File("d://ttt.htm");
- int len = (int)file.length();
- byte[] b;
- b = new byte[len];
- FileInputStream fis = new FileInputStream(file);
- fis.read(b);
- fis.close();
- ParseHtml pp=new ParseHtml(new String(b));
- List list =pp.parse();
- for(int i=0;list.size()>0;i++){
- System.out.println(list.get(i));
- }
- }
- catch (Exception ex) {
- ex.printStackTrace();
- }
- }
- }
- import java.util.ArrayList;
- import java.util.List;
- public abstract class ExportImg {
- private List imgList =new ArrayList();
- private ParseHtml parseHtml =null;
- private String imgSrc = null; // 图片存放文件夹
- private String imgDest = null;// 图片将要被转移到文件夹,如果不需要转移图片可以调用两个构造函数的方法
- public ExportImg(String srcHtml){
- this(srcHtml,null,null);
- }
- public ExportImg(String srcHtml,String imgSrc){
- this(srcHtml,imgSrc,null);
- }
- public ExportImg(String srcHtml,String imgSrc,String imgDest){
- this.imgSrc=imgSrc;
- this.imgDest=imgDest;
- parseHtml =new ParseHtml(srcHtml);
- }
- public List getImgList() {
- return imgList;
- }
- public void setImgList(List imgList) {
- this.imgList = imgList;
- }
- public String export(){
- imgList = parseHtml.parse();
- return operate();
- }
- public abstract String operate();
- public String getImgDest() {
- return imgDest;
- }
- public void setImgDest(String imgDest) {
- this.imgDest = imgDest;
- }
- public String getImgSrc() {
- return imgSrc;
- }
- public void setImgSrc(String imgSrc) {
- this.imgSrc = imgSrc;
- }
- }
-
- package net.risesoft.riseinfo.integration.parse;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.List;
- /*
- *将fckedit中的图片取出并导入到指定目录
- *@author sking huang
- *@2008-11-11
- */
- public class ExportImgToFile extends ExportImg {
- public ExportImgToFile(String srcHtml, String imgSrc, String imgDest) {
- super(srcHtml, imgSrc, imgDest);
- }
- private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory
- .getLog(ExportImgToFile.class);
- // 在将文件从指定数据源拷贝到指定目录之前请先给图片列表赋值,图片列表中只存图片名称
- public String operate() {
- for (int i = 0; i < super.getImgList().size(); i++) {
- String imageName = (String) super.getImgList().get(i);
- try {
- File file = new File(getImgSrc() + File.separator + imageName);
- //如果文件存在且是文件
- if (file.exists() && file.isFile()) {
- FileInputStream input = new FileInputStream(file);
- FileOutputStream output = new FileOutputStream(getImgDest()
- + File.separator + imageName);
- byte[] b= new byte[1024];
- int size=0;
- while ((size = input.read(b)) != -1) {
- output.write(b,0,size);
- }
- input.close();
- output.close();
- }
- } catch (IOException e) {
- log.error("文件导出过程中出现问题", e);
- }
- }
- return null;
- }
- public static void main(String[] args) {
- // 生成图片
- try {
- File file = new File("d://ttt.htm");
- int len = (int) file.length();
- byte[] b;
- b = new byte[len];
- FileInputStream fis = new FileInputStream(file);
- fis.read(b);
- fis.close();
- ExportImg eif = new ExportImgToFile(new String(b), "D://ttt.files",
- "D://img//dest");
- eif.export();
- } catch (Exception ex) {
- ex.printStackTrace();
- }
- }
- }
- package net.risesoft.riseinfo.integration.parse;
- import java.io.ByteArrayInputStream;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.InputStream;
- import java.io.OutputStream;
- import javax.xml.parsers.DocumentBuilder;
- import javax.xml.parsers.DocumentBuilderFactory;
- import org.w3c.dom.Document;
- import org.w3c.dom.Node;
- import org.w3c.dom.NodeList;
- import net.risesoft.integration.adapter.AdapterUtil;
- public class ExportImgToXml extends ExportImg {
- private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory
- .getLog(ExportImgToXml.class);
- public ExportImgToXml(String srcHtml, String imgSrc) {
- super(srcHtml, imgSrc);
- }
- public String operate() {
- StringBuffer sb=new StringBuffer();
- sb.append("<?xml version=/"1.0/" encoding=/"GB2312/"?>");
- sb.append("<DATA>");
- for (int i = 0; i < super.getImgList().size(); i++) {
- String imageName = (String) super.getImgList().get(i);
- try {
- File file = new File(getImgSrc() + File.separator + imageName);
- //如果文件存在且是文件
- if (file.exists() && file.isFile()) {
- sb.append("<IMGLIST>");
- sb.append("<IMGNAME>" + file.getName() + "</IMGNAME>");
- FileInputStream input = new FileInputStream(file);
- byte[] b=new byte[(int)file.length()];
- input.read(b);
- sb.append("<IMGVALUE>" + AdapterUtil.base64Encode(b) +
- "</IMGVALUE>");
- sb.append("</IMGLIST>");
- input.close();
- }
- } catch (Exception e) {
- log.error("文件生成xml过程中出现问题", e);
- }
- }
- sb.append("</DATA>");
- return sb.toString();
- }
- //此方法为组工网一端接收xml的例子,只做参考用
- public void parseXml(InputStream is){
- try {
- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
- factory.setNamespaceAware(true);
- factory.setValidating(true);
- DocumentBuilder builder = factory.newDocumentBuilder();
- Document doc = builder.parse(is);
- NodeList attnodeList = doc
- .getElementsByTagName("IMGLIST");
- int attlength = attnodeList.getLength();
- for (int attIndex = 0; attIndex < attlength; attIndex++) { // 实际上,Attachment有0到n个
- Node attnode = attnodeList.item(attIndex);
- NodeList attlist = attnode.getChildNodes();
- String fileName=null;
- byte[] fileContent=null;
- for (int j = 0; j < attlist.getLength(); j++) {
- Node col = attlist.item(j);
- if (col.getNodeName() == null) {
- continue;
- }
- Node firstChild = col.getFirstChild();
- if (firstChild == null) {
- continue;
- }
- String value = firstChild.getNodeValue();
- if (value == null && value.length() == 0) {
- continue;
- }
- String field = col.getNodeName();
- if (field.equals("IMGVALUE")) {
- fileContent=AdapterUtil.base64Decode(value);
- } else if (field.equals("IMGNAME")) {
- fileName= value;
- }
- }
- if(fileName!=null && fileContent!=null){
- File file =new File("D://img//dest//"+fileName);
- if(!file.exists())
- file.createNewFile();
- OutputStream fos =new FileOutputStream(file);
- fos.write(fileContent);
- fos.close();
- }
- }
- log.info("**************数据写入成功********************");
- } catch (Exception ex) {
- log.error("附件写入出错了!", ex);
- }
- }
- public static void main(String[] args) {
- // 生成图片
- try {
- File file = new File("d://ttt.htm");
- int len = (int) file.length();
- byte[] b;
- b = new byte[len];
- FileInputStream fis = new FileInputStream(file);
- fis.read(b);
- fis.close();
- ExportImgToXml eif = new ExportImgToXml(new String(b), "D://ttt.files");
- String img=eif.export();
- //System.out.println(img);
- InputStream is =new ByteArrayInputStream(img.getBytes());
- eif.parseXml(is);
- } catch (Exception ex) {
- ex.printStackTrace();
- }
- }
- }
将html文件中的图片导出到某一文件夹或者生成xml文件
最新推荐文章于 2023-11-11 17:19:07 发布