package test;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.URL;
import javax.swing.text.Document;
import javax.swing.text.EditorKit;
import javax.swing.text.SimpleAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;
public class javahtml {
public static void main(String[] args)
throws Exception
{
EditorKit kit = new HTMLEditorKit();
Document doc = kit.createDefaultDocument();
// The Document class does not yet handle charset's properly.
doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);
// Create a reader on the HTML content.
Reader rd = getReader("http://hexun.com/kangojian/default.html");
// Parse the HTML.
kit.read(rd, doc, 0);
// The HTML text is now stored in the document
HTMLDocument.Iterator it = ((HTMLDocument) doc).getIterator(HTML.Tag.A);
while(it.isValid())
{
SimpleAttributeSet s = (SimpleAttributeSet)it.getAttributes();
String href = (String)s.getAttribute(HTML.Attribute.HREF);
System.out.println(href);
it.next();
}
} // Returns a reader on the HTML data. If 'uri' begins // with "http:", it's treated as a URL; otherwise, // it's assumed to be a local filename. static Reader getReader(String uri) throws IOException { // Retrieve from Internet. if (uri.startsWith("http:")) { HttpURLConnection conn = (HttpURLConnection) new URL(uri).openConnection(); return new InputStreamReader(conn.getInputStream()); } // Retrieve from file. else { return new FileReader(uri); } } }