Android之Jsoup解析数据
jsoup 简介
Java 程序在解析 HTML 文档时,相信大家都接触过 htmlparser 这个开源项目,我曾经在 IBM DW 上发表过两篇关于 htmlparser 的文章,分别是:从 HTML 中攫取你所需的信息和 扩展 HTMLParser 对自定义标签的处理能力。但现在我已经不再使用 htmlparser 了,原因是 htmlparser 很少更新,但最重要的是有了 jsoup 。
jsoup 是一款 Java 的 HTML 解析器,可直接解析某个 URL 地址、HTML 文本内容。它提供了一套非常省力的 API,可通过 DOM,CSS 以及类似于 jQuery 的操作方法来取出和操作数据。
jsoup 的主要功能如下:
1. 从一个 URL,文件或字符串中解析 HTML;
2. 使用 DOM 或 CSS 选择器来查找、取出数据;
3. 可操作 HTML 元素、属性、文本;
jsoup 是基于 MIT 协议发布的,可放心使用于商业项目。
jsoup 的主要类层次结构如图 1 所示:
图 1. jsoup 的类层次结构
下面我们来看看Jsoup解析的一个案例:
1.主函数:MaActivity.class
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import android.os.Bundle;
import android.os.Handler;
import android.app.Activity;
import android.content.Intent;
import android.util.Log;
import android.view.Menu;
import android.view.View;
import android.widget.AdapterView;
import android.widget.AdapterView.OnItemClickListener;
import android.widget.ListView;
public class MainActivity extends Activity {
protected static final String TAG = "MainActivity";
private ListView listView;
private ArrayList<Info> list;
Handler handler=new Handler(){
public void handleMessage(android.os.Message msg) {
if(msg.what==0){
listView.setAdapter(new ListViewAdapter(MainActivity.this, list));
}
};
};
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
//初始化数据
listView = (ListView) findViewById(R.id.listview);
//创建集合
list = new ArrayList<Info>();
//解析数据
parseHtml();
//对ListView设置点击事件
listView.setOnItemClickListener(new OnItemClickListener() {
@Override
public void onItemClick(AdapterView<?> arg0, View arg1, int arg2,
long arg3) {
Intent intent=new Intent(MainActivity.this,WebActivity.class);
intent.putExtra("path", list.get(arg2).getHref());
startActivity(intent);
}
});
}
private void parseHtml() {
new Thread(){
public void run() {
try {
Document parse = Jsoup.parse(new URL("http://www.cnbeta.com/topics/444.htm"),5000);
Elements Eleclass = parse.getElementsByClass("title");
Elements Elenewsinfo = parse.getElementsByClass("newsinfo");
Elements Elepic = parse.getElementsByClass("pic");
int size = Eleclass.size();
for (int i = 0; i < size; i++) {
String title = Eleclass.get(i).getElementsByTag("a").text();
String href = Eleclass.get(i).getElementsByTag("a").attr("href");
String newsinfo = Elenewsinfo.get(i).getElementsByTag("p").text();
String pic = Elepic.get(i).getElementsByTag("img ").attr("src");
list.add(new Info(title, newsinfo, pic, href));
}
for (Info info : list) {
Log.i(TAG, info.toString());
}
handler.sendEmptyMessage(0);
} catch (IOException e) {
e.printStackTrace();
}
};
}.start();
}
}
<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
>
<ListView
android:id="@+id/listview"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="@string/hello_world" />
</RelativeLayout>
<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:orientation="horizontal" >
<LinearLayout
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:layout_weight="1"
android:orientation="vertical"
>
<TextView
android:id="@+id/title"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Large Text"
android:padding="10dp"
android:textSize="20dp"
android:singleLine="true"
android:textAppearance="?android:attr/textAppearanceLarge" />
<TextView
android:id="@+id/textView3"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:padding="5dp"
/>
<TextView
android:id="@+id/info"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Large Text"
android:textSize="15dp"
android:lines="3"
android:ellipsize="end"
android:textAppearance="?android:attr/textAppearanceLarge" />
</LinearLayout>
<com.example.webview.ImageViewCircle
android:id="@+id/img"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:padding="20dp"
android:src="@drawable/ic_launcher" />
</LinearLayout>
4.主函数的适配器:ListViewAdapter.class
import java.util.ArrayList;
import com.lidroid.xutils.BitmapUtils;
import android.content.Context;
import android.view.View;
import android.view.ViewGroup;
import android.widget.BaseAdapter;
import android.widget.ImageView;
import android.widget.TextView;
public class ListViewAdapter extends BaseAdapter {
private Context context;
private ArrayList<Info> list;
public ListViewAdapter(Context context,ArrayList<Info> list){
this.context=context;
this.list=list;
}
@Override
public int getCount() {
return list.size();
}
@Override
public Object getItem(int position) {
return list.get(position);
}
@Override
public long getItemId(int position) {
return position;
}
@Override
public View getView(int position, View convertView, ViewGroup parent) {
ViewHolder holder;
if(convertView==null){
convertView = View.inflate(context, R.layout.listview_item, null);
holder=new ViewHolder();
holder.img = (ImageViewCircle) convertView.findViewById(R.id.img);
holder.title = (TextView) convertView.findViewById(R.id.title);
holder.info = (TextView) convertView.findViewById(R.id.info);
convertView.setTag(holder);
}else{
holder = (ViewHolder) convertView.getTag();
}
holder.info.setText(list.get(position).getNewsinfo());
holder.title.setText(list.get(position).getTitle());
BitmapUtils bitmapUtils=new BitmapUtils(context);
bitmapUtils.display(holder.img, list.get(position).getPic());
return convertView;
}
class ViewHolder{
TextView title,info;
ImageViewCircle img;
}
}
5. WebActivity的布局:activity_web.xml
<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
>
<WebView
android:layout_width="match_parent"
android:layout_height="match_parent"
android:id="@+id/webView" />
</RelativeLayout>
6.实体类 Info.class
import java.io.Serializable;
public class Info implements Serializable{
private String title;
private String newsinfo;
private String pic;
private String href;
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getNewsinfo() {
return newsinfo;
}
public void setNewsinfo(String newsinfo) {
this.newsinfo = newsinfo;
}
public String getPic() {
return pic;
}
public void setPic(String pic) {
this.pic = pic;
}
public String getHref() {
return href;
}
public void setHref(String href) {
this.href = href;
}
public Info(String title, String newsinfo, String pic, String href) {
super();
this.title = title;
this.newsinfo = newsinfo;
this.pic = pic;
this.href = href;
}
public Info() {
super();
}
@Override
public String toString() {
return "Info [title=" + title + ", newsinfo=" + newsinfo + ", pic="
+ pic + ", href=" + href + "]";
}
}
7.ImageViewCircle工具类
import android.annotation.SuppressLint;
import android.content.Context;
import android.graphics.Bitmap;
import android.graphics.BitmapShader;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Matrix;
import android.graphics.Paint;
import android.graphics.Rect;
import android.graphics.Shader.TileMode;
import android.graphics.drawable.BitmapDrawable;
import android.graphics.drawable.ColorDrawable;
import android.graphics.drawable.Drawable;
import android.util.AttributeSet;
import android.widget.ImageView;
public class ImageViewCircle extends ImageView {
private Paint mPaintBitmap = new Paint(Paint.ANTI_ALIAS_FLAG);
private Bitmap mRawBitmap;
private BitmapShader mShader;
private Matrix mMatrix = new Matrix();
public ImageViewCircle(Context context, AttributeSet attrs, int defStyle) {
super(context, attrs, defStyle);
// TODO Auto-generated constructor stub
}
public ImageViewCircle(Context context, AttributeSet attrs) {
super(context, attrs);
// TODO Auto-generated constructor stub
}
public ImageViewCircle(Context context) {
super(context);
// TODO Auto-generated constructor stub
}
@Override
protected void onDraw(Canvas canvas) {
Bitmap rawBitmap = getBitmap(getDrawable());
if (rawBitmap != null){
int viewWidth = getWidth();
int viewHeight = getHeight();
int viewMinSize = Math.min(viewWidth, viewHeight);
float dstWidth = viewMinSize;
float dstHeight = viewMinSize;
if (mShader == null || !rawBitmap.equals(mRawBitmap)){
mRawBitmap = rawBitmap;
mShader = new BitmapShader(mRawBitmap, TileMode.CLAMP, TileMode.CLAMP);
}
if (mShader != null){
mMatrix.setScale(dstWidth / rawBitmap.getWidth(), dstHeight / rawBitmap.getHeight());
mShader.setLocalMatrix(mMatrix);
}
mPaintBitmap.setShader(mShader);
float radius = viewMinSize / 2.0f;
canvas.drawCircle(radius, radius, radius, mPaintBitmap);
} else {
super.onDraw(canvas);
}
}
@SuppressLint("NewApi")
private Bitmap getBitmap(Drawable drawable){
if (drawable instanceof BitmapDrawable){
return ((BitmapDrawable)drawable).getBitmap();
} else if (drawable instanceof ColorDrawable){
Rect rect = drawable.getBounds();
int width = rect.right - rect.left;
int height = rect.bottom - rect.top;
int color = ((ColorDrawable)drawable).getColor();
Bitmap bitmap = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888);
Canvas canvas = new Canvas(bitmap);
canvas.drawARGB(Color.alpha(color), Color.red(color), Color.green(color), Color.blue(color));
return bitmap;
} else {
return null;
}
}
}
8.WebActivity.class
import android.os.Bundle;
import android.app.Activity;
import android.content.Intent;
import android.webkit.WebChromeClient;
import android.webkit.WebView;
public class WebActivity extends Activity {
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_web);
WebView webView = (WebView) findViewById(R.id.webView);
Intent intent = getIntent();
String path = intent.getStringExtra("path");
webView.setWebChromeClient(new WebChromeClient());
webView.loadUrl("http://www.cnbeta.com"+path);
}
}
最后别忘了配置网络权限。