首先贴上地址: http://www.17g.com/guild
如何查看一个网页的html源代码:右击鼠标——查看源代码,在要解析的时候点击审查元素,就可以看到html结构,方便解析。如下图
此时鼠标点击到的源码 会在相应的网页上变成蓝色,现在就可以根据自己的需求解析了
package com.example.logintest;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import android.app.Activity;
import android.os.AsyncTask;
import android.os.Bundle;
import android.view.View;
import android.view.View.OnClickListener;
import android.widget.Button;
import android.widget.ListView;
public class SecondActivity extends Activity{
private Button listbutton;
private ListView listview;
private DefineClass bean;
private String URL = "http://www.17g.com/guild";
@Override
protected void onCreate(Bundle savedInstanceState) {
// TODO Auto-generated method stub
super.onCreate(savedInstanceState);
setContentView(R.layout.second_user);
listbutton = (Button) findViewById(R.id.list);
listview = (ListView) findViewById(R.id.datalist);
listbutton.setOnClickListener(new OnClickListener() {
@Override
public void onClick(View arg0) {
new NewsAsyncTask().execute(URL);
}
});
}
class NewsAsyncTask extends AsyncTask<String,Void,List<DefineClass>> {//等同于开启新线程下载
@Override
protected List<DefineClass> doInBackground(String... params) { //下载
return getURLdata(params[0]); //params,请求网址
}
@Override
protected void onPostExecute(List<DefineClass> classlist) { //下载完处理
super.onPostExecute(classlist);
GuildAdapter adapter = new GuildAdapter(SecondActivity.this,classlist);
listview.setAdapter(adapter);
}
}
/**
* 通过获取到的html源码解析出listview中要显示的成分,并将其加入到list中
* list,其中一条内容代表着一行listview要显示的所有数据
* @param url html源码的网址
* @return 返回装了所有数据的classlist
*/
private List<DefineClass> getURLdata(String url) { //从获取到的源码中解析出要用的内容
List<DefineClass> classlist = new ArrayList<DefineClass>();
String dataurl = getURLhtml(url);
Document doc = Jsoup.parse(dataurl);
Element units = doc.getElementById("g-ul-list");
Elements u_ele = units.getElementsByTag("li");
for(int i=0;i<u_ele.size();i++) {
Element un_ele = u_ele.get(i);
bean = new DefineClass();
//name
Elements u_eles = un_ele.getElementsByClass("h5");
Element f_child = u_eles.get(0);
Element child = f_child.child(0);
bean.name = child.text();
System.out.println(child.text());
//introduction
Elements u_eles_in = un_ele.getElementsByClass("g-r");
Element f_child_in = u_eles_in.get(0);
Element child_in = f_child_in.child(0);
bean.introduction = child_in.text();
System.out.println(child_in.text());
//number、No
Elements u_eles_no = un_ele.getElementsByTag("p");
Element f_child_no = u_eles_no.get(0);
Element child_no = f_child_no.child(0);
bean.number = child_no.text();
System.out.println(child_no.text());
Element f_child1_no = u_eles_no.get(1);
Element child1_no = f_child1_no.child(0);
bean.No = child1_no.text();
System.out.println(child1_no.text());
//pic
Elements up_eles = un_ele.getElementsByClass("guild-img");
Element fp_child = up_eles.get(0);
Element childp = fp_child.child(0);
bean.picurl = childp.child(0).attr("src");
System.out.println(childp.child(0).attr("src"));
classlist.add(bean);
}
return classlist;
}
private String getURLhtml(String url) { //获取网络中html源码 全部
HttpURLConnection connection = null;
StringBuilder response = null;
try {
URL urls = new URL(url);
connection = (HttpURLConnection) urls.openConnection();
connection.setReadTimeout(8000);
connection.setRequestMethod("GET");
connection.setConnectTimeout(8000);
InputStream in = connection.getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in,"utf-8"));
response = new StringBuilder();
String line = null;
while((line=reader.readLine())!=null) {
response.append(line);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if(connection != null) {
connection.disconnect();
}
}
return response.toString();
}
}
注:此篇文章和上一篇文章是属于同一个project,为简便起见分开描述