下载网页中的图片,稍做修改可作为网页内容抓取程序
package com.pattern.demo;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
public class Test1 {
public static void main(String[] args) throws Exception {
Test1.test("");
//
// File file=new
// File("http://fashion.163.com/photoview/43AJ0026/14534.html");
// System.out.println(file.getName());
// BufferedReader in;
// Pattern pattern = Pattern.compile("//(//d{3}//)//s//d{3}-//d{4}");
// //in = new BufferedReader(new FileReader("c:/a.txt"));
// in = new BufferedReader(new FileReader(new File(new
// URI("http://fashion.163.com/photoview/43AJ0026/14534.html"))));
// String s;
// int i=0;
// while ((s = in.readLine()) != null) {
// //System.out.println(i+++":"+s);
// if(s.startsWith("<i title=\"img\">"))
// System.out.println(i+++":"+s.replaceAll("<i title=\"img\">","").replaceAll("</i>",
// ""));
// }
// in.close();
}
public static void test(String args) throws Exception {
URL url = new URL(
"http://fashion.163.com/photoview/43AJ0026/14309.html");
InputStream is = url.openStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = null;
int i = 0;
while ((line = reader.readLine()) != null) {
if (line.startsWith("<i title=\"img\">")) {
System.out.print(i++
+ ":"
+ line.replaceAll("<i title=\"img\">", "").replaceAll(
"</i>", ""));
BufferedInputStream in;
FileOutputStream file;
String picURL=line.replaceAll("<i title=\"img\">", "").replaceAll("</i>", "");
String fileNmae=picURL.substring(picURL.lastIndexOf("/")+1, picURL.length());
url = new URL(picURL);
in = new BufferedInputStream(url.openStream());
file = new FileOutputStream(new File("c:/pic/"+fileNmae));
int t;
while ((t = in.read()) != -1) {
file.write(t);
}
file.close();
in.close();
System.out.println("...done");
}
}
reader.close();
is.close();
System.out.println("all done");
}
}