import java.io.IOException;
import java.util.regex.*;
public class Testspider {
public static void main(String args[]) throws IOException{
String regex = "src=\"(/\\w{1,6}_\\w{1,5}/\\w{1,6}/\\d\\.\\w{1,3})\"/>";
// 例如:匹配 src="/games_html/images/1.gif"
String webAddress = "http://www.shua123.com";
String sCurrentLine;
String sTotalString;
sCurrentLine="";
sTotalString="";
java.io.InputStream l_urlStream;
java.net.URL l_url = new java.net.URL(webAddress);
java.net.HttpURLConnection l_connection = (java.net.HttpURLConnection) l_url.openConnection();
l_connection.connect();
l_urlStream = l_connection.getInputStream();
java.io.BufferedReader l_reader = new java.io.BufferedReader(new java.io.InputStreamReader(l_urlStream));
while ((sCurrentLine = l_reader.readLine()) != null)
{
sTotalString+=sCurrentLine;
}
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(sTotalString);
while (m.find()){
System.out.println(webAddress + m.group(1));
}
}
打印出:
http://www.shua123.com/games_html/images/1.gif
http://www.shua123.com/games_html/images/2.gif
http://www.shua123.com/games_html/images/3.gif
http://www.shua123.com/games_html/images/4.gif
http://www.shua123.com/games_html/images/5.gif
http://www.shua123.com/games_html/images/6.gif
http://www.shua123.com/games_html/images/7.gif
http://www.shua123.com/games_html/images/8.gif
http://www.shua123.com/games_html/images/9.gif
import java.util.regex.*;
public class Testspider {
public static void main(String args[]) throws IOException{
String regex = "src=\"(/\\w{1,6}_\\w{1,5}/\\w{1,6}/\\d\\.\\w{1,3})\"/>";
// 例如:匹配 src="/games_html/images/1.gif"
String webAddress = "http://www.shua123.com";
String sCurrentLine;
String sTotalString;
sCurrentLine="";
sTotalString="";
java.io.InputStream l_urlStream;
java.net.URL l_url = new java.net.URL(webAddress);
java.net.HttpURLConnection l_connection = (java.net.HttpURLConnection) l_url.openConnection();
l_connection.connect();
l_urlStream = l_connection.getInputStream();
java.io.BufferedReader l_reader = new java.io.BufferedReader(new java.io.InputStreamReader(l_urlStream));
while ((sCurrentLine = l_reader.readLine()) != null)
{
sTotalString+=sCurrentLine;
}
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(sTotalString);
while (m.find()){
System.out.println(webAddress + m.group(1));
}
}
打印出:
http://www.shua123.com/games_html/images/1.gif
http://www.shua123.com/games_html/images/2.gif
http://www.shua123.com/games_html/images/3.gif
http://www.shua123.com/games_html/images/4.gif
http://www.shua123.com/games_html/images/5.gif
http://www.shua123.com/games_html/images/6.gif
http://www.shua123.com/games_html/images/7.gif
http://www.shua123.com/games_html/images/8.gif
http://www.shua123.com/games_html/images/9.gif