android——网页解析SAXParser

本代码实现的是对某一网页的解析,主要代码为:

URL url = new URL("http://www./partner/tv/lau/songlist_hot.jsp?ps=9&si=1");
SAXParserFactory factory = SAXParserFactory.newInstance();

SAXParser parser = factory.newSAXParser();

XMLReader reader = parser.getXMLReader();

//reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();

reader.setContentHandler(new MyContentHandler());

reader.parse(new InputSource(url.openStream()));

MyContentHandler为我们自定义的一个继承 DefaultHandler 的一个类, MyContentHandler一般重写以下五个方法:
    @Override
    public void startDocument() throws SAXException {

        // TODO Auto-generated method stub

        super.startDocument();

    }

    @Override

    public void startElement(String uri, String localName, String qName,

            Attributes attributes) throws SAXException {

        // TODO Auto-generated method stub

        super.startElement(uri, localName, qName, attributes);        
    }

    

    @Override

    public void characters(char[] ch, int start, int length)

            throws SAXException {

        // TODO Auto-generated method stub
        super.characters(ch, start, length);

    }


    @Override

    public void endElement(String uri, String localName, String qName)

            throws SAXException {

        // TODO Auto-generated method stub

        super.endElement(uri, localName, qName);
    }


    @Override

    public void endDocument() throws SAXException {

        // TODO Auto-generated method stub

        super.endDocument();

    }

现在例如我们对以下网页的内容进行解析:分别取出"卡拉OK演示歌曲"、"最新歌曲"、"http://www./partner/tv/lau/songlist_last.jsp?ps=9&si=1 " 。。。这些内容。

<?xml version="1.0" encoding="UTF-8"?>
<menulist version="1.0"> 
  <parent title="卡拉OK演示歌曲" id="0"/>
    <menusets> 
    <menu id="0"> 
	<name>最新歌曲</name> 
	<menulink>http://www./partner/tv/lau/songlist_last.jsp?ps=9&si=1</menulink>
    </menu> 
    <menu id="1"> 
        <name>最热歌曲</name> 
	<menulink>http://www./partner/tv/lau/songlist_hot.jsp?ps=9&si=1</menulink>
    </menu> 
    <menu id="2"> 
	<name>总排行</name> 
	<menulink>http://www./partner/tv/lau/songlist_top.jsp?ps=9&si=1</menulink> 
    </menu>
    <menu id="3">
	<name>最热艺人</name> 
	<menulink>http://www./partner/tv/lau/starlist_top.jsp?ps=9&si=1</menulink> 
    </menu>
  </menusets> 
</menulist>

 

通过下面的程序解析的结果如下:

05-02 02:28:51.687: D/DEBUG(888): startElement localName:                    menulist
05-02 02:28:51.687: D/DEBUG(888): startElement attributes.getLocalName:      version
05-02 02:28:51.687: D/DEBUG(888): startElement attributes.getValue:          1.0
05-02 02:28:51.687: D/DEBUG(888): characters: 
05-02 02:28:51.687: D/DEBUG(888): startElement localName:                    parent
05-02 02:28:51.687: D/DEBUG(888): startElement attributes.getLocalName:      id
05-02 02:28:51.687: D/DEBUG(888): startElement attributes.getValue:          0 
05-02 02:28:51.687: D/DEBUG(888): startElement attributes.getLocalName:      title
05-02 02:28:51.687: D/DEBUG(888): startElement attributes.getValue:          卡拉OK演示歌曲
05-02 02:28:51.687: D/DEBUG(888): startElement endElement:                   parent
05-02 02:28:51.687: D/DEBUG(888): characters: 
05-02 02:28:51.697: D/DEBUG(888): startElement localName:                    menusets
05-02 02:28:51.697: D/DEBUG(888): characters: 
05-02 02:28:51.697: D/DEBUG(888): startElement localName:                    menu
05-02 02:28:51.697: D/DEBUG(888): startElement attributes.getLocalName:      id
05-02 02:28:51.697: D/DEBUG(888): startElement attributes.getValue:          0 
05-02 02:28:51.697: D/DEBUG(888): characters: 
05-02 02:28:51.697: D/DEBUG(888): startElement localName:                    name
05-02 02:28:51.697: D/DEBUG(888): characters:                                最新歌曲
05-02 02:28:51.697: D/DEBUG(888): startElement endElement:                   name
05-02 02:28:51.697: D/DEBUG(888): characters: 
05-02 02:28:51.697: D/DEBUG(888): startElement localName:                    menulink
05-02 02:28:51.697: D/DEBUG(888): characters:                                http://www./partner/tv/lau/songlist_last.jsp?ps=9
05-02 02:28:51.707: D/DEBUG(888): characters:                                & 
05-02 02:28:51.707: D/DEBUG(888): characters:                                si=1
05-02 02:28:51.707: D/DEBUG(888): startElement endElement:                   menulink
05-02 02:28:51.707: D/DEBUG(888): characters: 
05-02 02:28:51.707: D/DEBUG(888): startElement endElement:                   menu
05-02 02:28:51.707: D/DEBUG(888): characters: 
05-02 02:28:51.707: D/DEBUG(888): startElement localName:                    menu
05-02 02:28:51.707: D/DEBUG(888): startElement attributes.getLocalName:      id
05-02 02:28:51.707: D/DEBUG(888): startElement attributes.getValue:          1 
05-02 02:28:51.707: D/DEBUG(888): characters: 
05-02 02:28:51.707: D/DEBUG(888): startElement localName:                    name
05-02 02:28:51.707: D/DEBUG(888): characters:                                最热歌曲
05-02 02:28:51.707: D/DEBUG(888): startElement endElement:                   name
05-02 02:28:51.707: D/DEBUG(888): characters: 
05-02 02:28:51.707: D/DEBUG(888): startElement localName:                    menulink
05-02 02:28:51.717: D/DEBUG(888): characters:                                http://www./partner/tv/lau/songlist_hot.jsp?ps=9
05-02 02:28:51.717: D/DEBUG(888): characters:                                &
05-02 02:28:51.717: D/DEBUG(888): characters:                                si=1
05-02 02:28:51.717: D/DEBUG(888): startElement endElement:                   menulink
05-02 02:28:51.717: D/DEBUG(888): characters: 
05-02 02:28:51.717: D/DEBUG(888): startElement endElement:                   menu
05-02 02:28:51.717: D/DEBUG(888): characters: 
05-02 02:28:51.717: D/DEBUG(888): startElement localName:                    menu
05-02 02:28:51.717: D/DEBUG(888): startElement attributes.getLocalName:      id
05-02 02:28:51.717: D/DEBUG(888): startElement attributes.getValue:          2
05-02 02:28:51.717: D/DEBUG(888): characters: 
05-02 02:28:51.727: D/DEBUG(888): startElement localName:                    name
05-02 02:28:51.727: D/DEBUG(888): characters:                                总排行
05-02 02:28:51.727: D/DEBUG(888): startElement endElement:                   name
05-02 02:28:51.727: D/DEBUG(888): characters: 
05-02 02:28:51.727: D/DEBUG(888): startElement localName:                    menulink
05-02 02:28:51.727: D/DEBUG(888): characters:                                http://www./partner/tv/lau/songlist_top.jsp?ps=9
05-02 02:28:51.727: D/DEBUG(888): characters:                                &
05-02 02:28:51.727: D/DEBUG(888): characters:                                si=1
05-02 02:28:51.727: D/DEBUG(888): startElement endElement:                   menulink
05-02 02:28:51.727: D/DEBUG(888): characters: 
05-02 02:28:51.727: D/DEBUG(888): startElement endElement:                   menu
05-02 02:28:51.727: D/DEBUG(888): characters: 
05-02 02:28:51.727: D/DEBUG(888): startElement localName:                    menu
05-02 02:28:51.727: D/DEBUG(888): startElement attributes.getLocalName:      id
05-02 02:28:51.727: D/DEBUG(888): startElement attributes.getValue:          3
05-02 02:28:51.738: D/DEBUG(888): characters: 
05-02 02:28:51.738: D/DEBUG(888): startElement localName:                    name
05-02 02:28:51.738: D/DEBUG(888): characters:                                最热艺人
05-02 02:28:51.738: D/DEBUG(888): startElement endElement:                   name
05-02 02:28:51.738: D/DEBUG(888): characters: 
05-02 02:28:51.738: D/DEBUG(888): startElement localName:                    menulink
05-02 02:28:51.738: D/DEBUG(888): characters:                                http://www./partner/tv/lau/starlist_top.jsp?ps=9
05-02 02:28:51.738: D/DEBUG(888): characters:                                &
05-02 02:28:51.747: D/DEBUG(888): characters:                                si=1
05-02 02:28:51.747: D/DEBUG(888): startElement endElement:                   menulink
05-02 02:28:51.747: D/DEBUG(888): characters: 
05-02 02:28:51.747: D/DEBUG(888): startElement endElement:                   menu
05-02 02:28:51.747: D/DEBUG(888): characters: 
05-02 02:28:51.747: D/DEBUG(888): startElement endElement:                   menusets
05-02 02:28:51.747: D/DEBUG(888): characters: 
05-02 02:28:51.747: D/DEBUG(888): startElement endElement:                   menulist

先上代码
要注意,必须在AndroidMainfest.xml上面加上INTERNET权限:
<uses-permission android:name="android.permission.INTERNET"/>

SaxParserDemoActivity.java
package com.lau.android.saxparse;

import java.io.IOException;

import java.net.MalformedURLException;

import java.net.URL;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.parsers.SAXParser;

import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.InputSource;

import org.xml.sax.SAXException;

import org.xml.sax.XMLReader;

import android.app.Activity;

import android.os.Bundle;

import android.view.View;

import android.view.View.OnClickListener;

import android.widget.Button;



public class SaxParserDemoActivity extends Activity {

    

    private Button button = null;

    

    /** Called when the activity is first created. */

    @Override

    public void onCreate(Bundle savedInstanceState) {

        super.onCreate(savedInstanceState);

        setContentView(R.layout.main);

        

        button = (Button) findViewById(R.id.button1);

        button.setOnClickListener(new OnClickListener() {            
            @Override

            public void onClick(View v) {

                // TODO Auto-generated method stub

                new Thread(new Runnable() {                    
                    @Override

                    public void run() {

                        // TODO Auto-generated method stub                        
                        try {

                            URL url = new URL("****");

                          SAXParserFactory factory = SAXParserFactory.newInstance();

                            SAXParser parser = factory.newSAXParser();

                            XMLReader reader = parser.getXMLReader();

//            reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();

                            reader.setContentHandler(new MyContentHandler());

                            reader.parse(new InputSource(url.openStream()));

                        } catch (MalformedURLException e) {

                            // TODO Auto-generated catch block

                            e.printStackTrace();

                        } catch (ParserConfigurationException e) {

                            // TODO Auto-generated catch block

                            e.printStackTrace();

                        } catch (SAXException e) {

                            // TODO Auto-generated catch block

                            e.printStackTrace();

                        } catch (IOException e) {

                            // TODO Auto-generated catch block

                            e.printStackTrace();

                        }

                    }

                }).start();                
            }

        });

    }

}
MyContentHandler.java
package com.lau.android.saxparse;

import org.xml.sax.Attributes;

import org.xml.sax.SAXException;

import org.xml.sax.helpers.DefaultHandler;

import android.util.Log;



public class MyContentHandler extends DefaultHandler {

    @Override

    public void startDocument() throws SAXException {

        // TODO Auto-generated method stub

        super.startDocument();

    }

    @Override

    public void startElement(String uri, String localName, String qName,

            Attributes attributes) throws SAXException {

        // TODO Auto-generated method stub

        super.startElement(uri, localName, qName, attributes);

        Log.d("DEBUG", "startElement localName " + localName);

        for(int i=0; i<attributes.getLength(); i++) {

            String[] str = new String[2];

            Log.d("DEBUG", "attributes1:" + attributes.getLocalName(i));

            Log.d("DEBUG", "attributes2:" + attributes.getValue(i));
        }
    }

    

    @Override

    public void characters(char[] ch, int start, int length)

            throws SAXException {

        // TODO Auto-generated method stub

        String str = new String(ch, start, length);

        Log.d("DEBUG", "characters: " + str);

        super.characters(ch, start, length);

    }

    @Override

    public void endElement(String uri, String localName, String qName)

            throws SAXException {

        // TODO Auto-generated method stub

        super.endElement(uri, localName, qName);

        Log.d("DEBUG", "endElement " + localName);

    }

    @Override

    public void endDocument() throws SAXException {

        // TODO Auto-generated method stub

        super.endDocument();

    }
}

main.xml


<?xml version="1.0" encoding="utf-8"?>

<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"

    android:layout_width="fill_parent"

    android:layout_height="fill_parent"

    android:orientation="vertical" >

    <TextView

        android:layout_width="fill_parent"

        android:layout_height="wrap_content"

        android:text="@string/hello" />

    <Button

        android:id="@+id/button1"

        android:layout_width="wrap_content"

        android:layout_height="wrap_content"

        android:text="Button" />

</LinearLayout>

应该注意几点

1、在解析网页时,例如上述网页中的链接 http://www./partner/tv/lau/songlist_hot.jsp?ps=9&si=1,
那么我们读到的将会把这个链接分为3段:
(1)、 http: / / www . / partner / tv / lau / starlist_top . jsp?ps = 9
(2)、 &
(3)、 si = 1
2、方法 startElement中的 解析的为"<>"括号中的参数
<parent title="卡拉OK演示歌曲" id="0"/>对应的内容
localName为" parent"
attributes.length为2
attributes . getLocalName (0 )将得到的为"id"
attributes . getValue (0 )将得到的为"0"
attributes . getLocalName (1 )将得到的为" title "
attributes . getValue (1 )将得到的为" 卡拉OK演示歌曲 "
3、在android4.0中,必须把网页解析放到线程里面

















  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值