https://blog.csdn.net/sam372648886gz/article/details/79374094
1.抓取登录页面后的页面
https://www.cnblogs.com/huihui123/p/7770533.html
思路:1,登录。2,登录成功后获得cookie。3,将cookie放到请求头中,向登录页发送请求。
1.获得URL对象
URL url =
new
URL(urlString);
2.打开连接
HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
3.向request中注入cookie
urlConn.setRequestProperty(
"Cookie"
, cookie);
4.创建字符缓冲输入流(BufferedReader)
BufferedReader br =
new
BufferedReader(
new
InputStreamReader(urlConn.getInputStream(),charset));
public class CrawTest {
//获得网页源代码
private static String getHtml(String urlString,String charset,String cookie){
StringBuffer html = new StringBuffer();
try {
URL url = new URL(urlString);
HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
urlConn.setRequestProperty("Cookie", cookie);
BufferedReader br = new BufferedReader(new InputStreamReader(urlConn.getInputStream(),charset));
String str;
while((str=br.readLine())!=null){
html.append(str);
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return html.toString();
}
//发送post请求,并返回请求后的cookie
private static String postGetCookie(String urlString,String params,String charset){
String cookies=null;
try {
URL url = new URL(urlString);
URLConnection urlConn = url.openConnection();
urlConn.setDoInput(true);
urlConn.setDoOutput(true);
PrintWriter out = new PrintWriter(urlConn.getOutputStream());
out.print(params);
out.flush();
cookies = urlConn.getHeaderFields().get("Set-Cookie").get(0);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return cookies;
}
public static void main(String[] args) {
String cookie = postGetCookie("http://localhost:8080/loginDemo/login",
"username=admin&password=123456","utf-8");
String html = getHtml("http://localhost:8080/loginDemo/index.jsp", "utf-8", cookie);
System.out.println(html);//这里我们就可能输出登录后的网页源代码了
}
}