1.获取baidu图片链接
首先将http响应内容测试获取到
public class baidu {
public static void main(String[] args) {
//定义即将访问的链接
String url = "http://www.baidu.com";
//定义一个字符串用来存储网页内容
String result = "";
//定义一个缓冲字符输入流
BufferedReader in = null;
BufferedWriter out = null;
try {
//将string转成url对象
URL realUrl = new URL(url);
//初始化一个链接到那个url的链接
URLConnection connection = realUrl.openConnection();
//开始实际的链接
connection.connect();
//初始化BufferedReader输入流来读取URL的响应
in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
out = new BufferedWriter(new FileWriter("1.txt"));
//用来临时存储抓取到的每一行数据
String line;
while ((line = in.readLine())!=null)
{
//遍历抓取到的每一行并将其存储到result里面
result += line + "\n";
System.out.println(result);
out.write(result);
out.newLine();
out.flush();
}
}catch (Exception e)
{
System.out.println("发送GET请求出现异常"+e);
e.printStackTrace();
}
}
}
接下来使用正则表达式来获取响应内容中的必要信息。
这是一部分信息(这部分是获取百度图片链接的)
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.PasswordAuthentication;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by dell on 2016/7/29.
* 获取百度图片链接
*/
public class Get_Baidu_1 {
public static String sendGet(String url)
{
//定义一个字符串用来存储网页内容
String result = "";
//定义一个缓冲字符输入流
BufferedReader in = null;
try {
//将String 转为 URL
URL realUrl = new URL(url);
//初始化一个链接到那个url的连接
URLConnection connection = realUrl.openConnection();
//开始实际的连接
connection.connect();
//初始化BufferedReader输入流来读取URL的响应
in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
//用来零时存储抓取到的每一行数据
String line;
while ((line = in.readLine())!= null)
{
//遍历抓取到的每一行放到 result
result += line;
}
}catch (Exception e)
{
System.out.println("GET失败");
}finally {
try {
if (in != null)
in.close();
}catch (Exception e)
{
System.out.println("关闭err");
}
return result;
}
}
public static String RegxString(String targetSrc, String patternSrc)
{
//定义一个样式模板,此中使用正则表达式,括号中要抓的内容
//相当于埋好了陷阱匹配的地方就会掉进去
Pattern pattern = Pattern.compile(patternSrc);
//定义一个Matcher来进行匹配
Matcher matcher = pattern.matcher(targetSrc);
//如果找到了
if(matcher.find())
{
return matcher.group();
}
return "Nothing";
}
public static void main(String[] args) {
//定义即将访问的链接
String url = "http://www.baidu.com";
//访问链接并获取页面内容
String result = sendGet(url);
System.out.println(result);
String imgSrc = RegxString(result,"src=\\/\\/(\\w{3}).(\\w+).(\\w+)\\/(\\w+)\\/([a-zA-Z_]+).(\\w+)");
System.out.println("image = "+imgSrc);
}
}
这样就完成百度链接的获取