问题:使用jsoup爬虫设置代理报错。
解决:端口原来为int类型,改为String类型即可。
@Test
public void internetTest() throws Exception{
System.setProperty("http.maxRedirects", "50");
System.getProperties().setProperty("proxySet", "true");
System.getProperties().put("https.proxyHost", "proxy.piccnet.com.cn");
System.getProperties().put("https.proxyPort", "3128");//注意端口为String类型。
String agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/56.0.2924.87 Safari/537.36" ;
Document doc = Jsoup.connect("https://www.zhihu.com/question/21832486")
.userAgent(agent)
.ignoreHttpErrors(true)//这个很重要 否则会报HTTP error fetching URL. Status=404
.timeout(3000).get();
if (doc!=null) {
System.err.println(doc.body().html());
}
}
java.net.SocketException: Unexpected end of file from server
at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:782)
at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:641)
at sun.net.www.protocol.http.HttpURLConnection.doTunneling(HttpURLConnection.java:1618)
at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:164)
at sun.net.www.protocol.https.HttpsURLConnectionImpl.connect(HttpsURLConnectionImpl.java:133)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:425)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:410)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:164)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:153)
at jsoup.JavaInterviewQuestions.JavaInterviewTi.internetTest(JavaInterviewTi.java:47)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229)
at org.junit.runners.ParentRunner.run(ParentRunner.java:309)
at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)
at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:467)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:683)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:390)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:197)