SAX解析器为有效URL抛出403异常,该异常在浏览器中加载时没有任何问题。我们能够用下面的示例代码重现问题。指针表示感谢。
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.helpers.DefaultHandler;
public class StackOverflow {
public static void main(String[] args) {
SAXParserFactory factory2 = SAXParserFactory.newInstance();
try {
String uri;
uri = "https://www.sec.gov/Archives/edgar/data/1326801/000132680119000009/FilingSummary.xml"; // Does not work
// uri = "https://www.w3schools.com/xml/simple.xml"; // ** Works if this line is uncommented **
SAXParser saxParser = factory2.newSAXParser();
DefaultHandler eventHandler = new DefaultHandler();
saxParser.parse(uri, eventHandler);
// ...
// Business Logic
// ...
System.out.println("SUCCESS");
}
catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
}
stacktrace如下:
java.io.IOException: Server returned HTTP response code: 403 for URL: https://www.sec.gov/Archives/edgar/data/1326801/000132680119000009/FilingSummary.xml
at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(Unknown Source)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(Unknown Source)
at sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(Unknown Source)
at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(Unknown Source)
at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(Unknown Source)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source)
at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(Unknown Source)
at javax.xml.parsers.SAXParser.parse(Unknown Source)
at StackOverflow.main(StackOverflow.java:15)