提取网页的图片链接的Java程序

最新推荐文章于 2023-10-25 22:48:48 发布

weixin_30532369

最新推荐文章于 2023-10-25 22:48:48 发布

阅读量92

点赞数

文章标签： java

原文链接：http://www.cnblogs.com/xiaoyz/archive/2008/06/09/1216271.html

版权

输入网页文件名，和资源列表文件名
输出资源列表文件供迅雷下载。
适用于批量下载图片。
由两个文件组成。
没有提供网页下载功能，因为我没有时间写，相关的代码以后再贴。

// AnalizeIMG.java
2

// 主程序
4

import java.io.BufferedReader;
6

import java.io.File;
7

import java.io.FileReader;
8

import java.io.FileWriter;
9

import java.io.IOException;
10

public class AnalizeIMG

{
13

public void p(String s)
15

{
16

System.out.println(s);
17

}
18

public void analizeFile(String infile,String outfile) throws Exception
20

{
21

File file = new File(infile);
22

if (file == null || !file.exists())

{
23

p("File " + infile + " not exits !");
24

}
25

if (!file.canRead())

{
27

p("File " + infile + " can't read !");
28

}
30

String strLine = null;
32

FileReader frd = new FileReader(infile);
33

BufferedReader bufferedReader = new BufferedReader(frd);
34

try

{
35

AnalizeWebParse parse = new AnalizeWebParse();
36

String s = parse.parse(bufferedReader);
37

createFile(outfile,s);
39

} catch (Exception ex)

{
41

throw ex;
42

} finally

{
43

frd.close();
44

bufferedReader.close();
45

}
46

}
47

private void createFile(String filename, String content)

{
49

FileWriter f = null;
50

try

{
51

f = new FileWriter(filename);
52

if (f == null || content == null)

{
53

return;
54

}
55

f.write(content);
57

f.flush();
58

f.close();
59

} catch (Exception e)

{
61

} finally

{
63

if (f != null)

{
64

try

{
65

f.close();
66

} catch (Exception e)

{
67

}
69

}
70

}
71

}
72

public static void main(String arg[])
74

{
75

AnalizeIMG ana = new AnalizeIMG();
76

try

{
77

ana.analizeFile("E:\\1.txt","E:\\out.lst");
78

}catch (Exception ex)

{
79

ex.printStackTrace();
80

}
81

}
82

}
83

第二个文件时解析文件

// AnalizeWebParse.java
2

// 网页分析代码，需要用户根据自己需要做适当修改
4

import java.io.BufferedReader;
6

import java.io.StringReader;
7

import java.util.regex.Pattern;
8

import javax.swing.text.MutableAttributeSet;
10

import javax.swing.text.html.HTML;
11

import javax.swing.text.html.HTMLEditorKit.ParserCallback;
12

import javax.swing.text.html.parser.ParserDelegator;
13

public class AnalizeWebParse extends ParserCallback

{
15

StringBuffer sb = new StringBuffer();
17

boolean start = false;
19

boolean finished = false;
20

public void p(String s)
22

{
23

System.out.println(s);
24

}
25

public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribs,
27

int pos)

{
28

if(finished == true)
30

{
31

return;
32

}
33

if (start == false)

{
35

if (tag == HTML.Tag.DIV)

{
36

String cla = (String) attribs
37

.getAttribute(HTML.Attribute.CLASS);
38

if (cla == null)

{
39

return;
40

}
41

if (cla.indexOf("body") != -1)

{
43

// Start
44

start = true;
45

}
46

}
47

}
48

}
49

public void handleEndTag(HTML.Tag tag, int pos)

{
51

if (tag == HTML.Tag.DIV && start == true && finished == false)

{
52

finished = true;
53

}
54

}
55

public void handleText(char[] text, int pos)

{
57

}
59

public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos)

{
61

if (t == HTML.Tag.IMG)

{
62

// get a src
63

String src = (String) a.getAttribute(HTML.Attribute.SRC);
64

if (src == null)

{
65

return;
66

}
67

if (Pattern.matches("^(http://.+)", src))

{
69

sb.append(src).append("\n");
70

}
71

}
72

}
73

public String parse(BufferedReader file) throws Exception

{
75

if(file==null)
76

{
77

return null;
78

}
79

ParserDelegator pd = new ParserDelegator();
81

try

{
82

pd.parse(file, this, true);
83

} catch (Exception e)

{
84

throw e;
85

}
86

return sb.toString();
88

}
89

}
90

转载于:https://www.cnblogs.com/xiaoyz/archive/2008/06/09/1216271.html

weixin_30532369

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
提取网页的图片链接的Java程序

输入网页文件名，和资源列表文件名输出资源列表文件供迅雷下载。适用于批量下载图片。由两个文件组成。没有提供网页下载功能，因为我没有时间写，相关的代码以后再贴。1//AnalizeIMG.java23//主程序45importjava.io.BufferedReader;6importjava.io.File;7importjava....
复制链接

扫一扫