android获取网页数据的几种方式

1.HTTPclient方式
package com.example.jiaoleiqiang.getwebdata;

import android.os.Bundle;
import android.os.Handler;
import android.os.Message;
import android.support.v7.app.AppCompatActivity;
import android.view.View;
import android.view.Window;
import android.view.WindowManager;
import android.widget.Button;
import android.widget.TextView;
import android.widget.Toast;

import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class MainActivity extends AppCompatActivity implements View.OnClickListener {

private Button getData;
private TextView webDataShow;

private String webData;
//    private String webSite = "http://lbsyun.baidu.com/index.php?title=android-locsdk/theupdatelog";

// private String webSite = “http://lbs.qq.com/geo/log.html“;
private String webSite = “http://lbs.amap.com/api/android-location-sdk/changelog“;

private static final int MSG_SUCCESS = 0;
private static final int MSG_FAILURE = 1;

private Handler mHandler = null;
private Thread httpClientThread;
private ArrayList<String> marks = new ArrayList<>();

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    requestWindowFeature(Window.FEATURE_NO_TITLE);
    getWindow().setFlags(WindowManager.LayoutParams.FLAG_FULLSCREEN,WindowManager.LayoutParams.FLAG_FULLSCREEN);
    setContentView(R.layout.activity_main);
    getData = findViewById(R.id.httpclient);
    webDataShow = findViewById(R.id.webDataShow);

    getData.setOnClickListener(this);
    getData.setOnClickListener(this);

    mHandler = new Handler() {
        @Override
        public void handleMessage(Message msg) {
            switch (msg.what) {
                case MSG_SUCCESS:
                    Toast.makeText(getApplicationContext(), "连接成功!",Toast.LENGTH_LONG).show();
                    Document doc = Jsoup.parse(msg.obj.toString());
                    webData = doc.body().text();
                    String re = "\\d{4}-\\d{1,2}-\\d{2}";
                    Pattern p = Pattern.compile(re);
                    Matcher m = p.matcher(webData);
                    while (m.find()) {
                        marks.add(m.group());
                    }
                    webData = webData.substring(webData.indexOf(marks.get(0)),webData.indexOf(marks.get(1)));
                    webDataShow.setText(webData);
                    break;
                case MSG_FAILURE:
                    Toast.makeText(getApplicationContext(), "链接失败", Toast.LENGTH_LONG).show();
            }
        }
    };
}

@Override
public void onClick(View view) {
    switch (view.getId()) {

        case R.id.httpclient:
            if (httpClientThread == null) {
                httpClientThread = new Thread(httpClientRunnable);
                httpClientThread.start();
            }
    }
}

Runnable httpClientRunnable = new Runnable() {
    @Override
    public void run() {
        httpClientWebData();
    }
};

private void httpClientWebData() {
    DefaultHttpClient httpClient = new DefaultHttpClient();
    HttpGet httpGet = new HttpGet(webSite);
    ResponseHandler<String> responseHandler = new BasicResponseHandler();

    try {
        //String content = httpClient.execute(httpGet, responseHandler);
        String content = new String(httpClient.execute(httpGet, responseHandler).getBytes(), "utf-8");
        mHandler.obtainMessage(MSG_SUCCESS, content).sendToTarget();
    } catch (ClientProtocolException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

}
2.URLconnection(获取失败,原因不明)
package com.example.jiaoleiqiang.getwebdata2;

import android.os.Bundle;
import android.os.Handler;
import android.os.Message;
import android.support.v7.app.AppCompatActivity;
import android.view.View;
import android.widget.Button;
import android.widget.TextView;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;

public class MainActivity extends AppCompatActivity implements View.OnClickListener {

private Button getData;
private TextView webData;

Handler handler = new Handler() {
    @Override
    public void handleMessage(Message msg) {
        super.handleMessage(msg);
        if (msg.what == 1) {
            Bundle b = msg.getData();
            String str = b.getString("value");
            webData.setText(str);
        }
    }
};

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_main);

    getData = findViewById(R.id.getData);
    webData = findViewById(R.id.webData);

    getData.setOnClickListener(this);
}

@Override
public void onClick(View view) {
    switch (view.getId()) {
        case R.id.getData:
            new getDataThread().start();
    }
}

class getDataThread extends Thread {
    @Override
    public void run() {
        try {
            URL mUrl = new URL("http://lbs.amap.com/api/android-location-sdk/changelog");
            byte[] buf = WebUtill.getContent(mUrl, "GET", "utf-8");
            String strResult = new String(buf,"utf-8");
            Bundle data = new Bundle();
            data.putString("value", strResult);

            Message msg = new Message();
            msg.what = 1;
            msg.setData(data);
            handler.sendMessage(msg);

            System.out.println("------>"+strResult);// 直接输出内容

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            WebUtill.closeConnection();
        }
    }
}

private static class WebUtill {
    static HttpURLConnection mHttpUrlConnection;
    static InputStream mInputStream;

    /**
     * @param url      address
     * @param method   post or get
     * @param codeType utf-8 or other
     * @return
     * @throws Exception
     */
    public static byte[] getContent(URL url, String method, String codeType) throws Exception {
        URL mUrl = url;
        mHttpUrlConnection = (HttpURLConnection) mUrl.openConnection();
        mHttpUrlConnection.setConnectTimeout(6000);// 设置连接超时
        mHttpUrlConnection.setRequestMethod(method);// get方式 发起请求

        if (mHttpUrlConnection.getResponseCode() != 200) {
            throw new RuntimeException("Fail to request url");
        }

        byte[] result;
        mInputStream = mHttpUrlConnection.getInputStream();// 得到网络返回的流
        result = readDate(mInputStream, "utf-8");
        mInputStream.close();
        return result;
    }

    private static byte[] readDate(InputStream input, String mode) throws IOException {
        byte[] buff = new byte[input.available()];
        System.out.println("input 的长度:" + input.available());
        input.read(buff);
        return buff;
    }

    public static void closeConnection() {
        if (mHttpUrlConnection != null)
            mHttpUrlConnection.disconnect();
    }

}

}

3.jsoup方式
package com.example.jiaoleiqiang.getwebcontent;

import android.os.Handler;
import android.os.Message;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.view.View;
import android.widget.Button;
import android.widget.TextView;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class MainActivity extends AppCompatActivity implements View.OnClickListener {

private Button updateList;
private TextView gaodeUpdate;
private TextView tencentUpdate;
private TextView baiduUpdate;


private static final int GAO_DE = 0;
private static final int TENCENT = 1;
private static final int BAI_DU = 2;

private String gaodeData;
private String tencentData;
private String baiduData;

private Handler mHandler = null;

@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_main);

    updateList = findViewById(R.id.updateList);
    gaodeUpdate = findViewById(R.id.gaodeUpdate);
    tencentUpdate = findViewById(R.id.tencentUpdate);
    baiduUpdate = findViewById(R.id.baiduUpdate);

    updateList.setOnClickListener(this);

    mHandler = new Handler() {
        @Override
        public void handleMessage(Message msg) {
            switch (msg.what) {
                case GAO_DE:
                    final ArrayList<String> gaoMarks = new ArrayList<>();
                    Document doc = Jsoup.parse(msg.obj.toString());
                    gaodeData = doc.body().text();

                    String re = "\\d{4}-\\d{1,2}-\\d{2}";
                    Pattern p = Pattern.compile(re);
                    Matcher m = p.matcher(gaodeData);
                    while (m.find()) {
                        gaoMarks.add(m.group());
                    }

                    gaodeData = gaodeData.substring(gaodeData.indexOf(gaoMarks.get(0)), gaodeData.indexOf(gaoMarks.get(1)));
                    gaodeUpdate.setText(gaodeData);
                    break;
                case TENCENT:
                    final ArrayList<String> tencentMarks = new ArrayList<>();
                    doc = Jsoup.parse(msg.obj.toString());
                    tencentData = doc.body().text();
                    re = "\\d{4}-\\d{2}-\\d{2}";
                    p = Pattern.compile(re);
                    m = p.matcher(tencentData);
                    while (m.find()) {
                        tencentMarks.add(m.group());
                    }
                    tencentData = tencentData.substring(tencentData.indexOf(tencentMarks.get(0)) + 10);
                    tencentData = tencentData.substring(tencentData.indexOf(tencentMarks.get(0)));
                    tencentData = tencentData.substring(0, tencentData.indexOf(tencentMarks.get(1)));
                    tencentUpdate.setText(tencentData);
                    break;
                case BAI_DU:
                    doc = Jsoup.parse(msg.obj.toString());
                    baiduData = doc.body().text();
                    baiduData = baiduData.substring(0, baiduData.lastIndexOf(";") + 1);
                    String str1 = baiduData.substring(baiduData.lastIndexOf(":") - 2);
                    baiduData = baiduData.substring(0, baiduData.lastIndexOf(":") - 2);

                    for (int i = 0; i < 2; i++) {
                        String ss = baiduData.substring(baiduData.lastIndexOf(":") - 2, baiduData.lastIndexOf(":") + 1);
                        if (str1.contains(ss)) {
                            str1 += "";
                        } else {
                            str1 = baiduData.substring(baiduData.lastIndexOf(":") - 2) + str1;
                            baiduData = baiduData.substring(0, baiduData.lastIndexOf(":") - 2);
                        }
                    }
                    baiduUpdate.setText(str1);
                    break;
                default:
                    break;
            }
        }
    };
}

@Override
public void onClick(View view) {
    switch (view.getId()) {
        case R.id.updateList:
            new Thread(gaodeRunnable).start();
            new Thread(tencentRunnable).start();
            new Thread(baiduRunnable).start();
            break;
    }
}

Runnable gaodeRunnable = new Runnable() {
    @Override
    public void run() {
        String url = "http://lbs.amap.com/api/android-location-sdk/changelog";
        Document doc = null;
        try {
            doc = Jsoup.connect(url).get();
            String content = doc.body().text();
            mHandler.obtainMessage(GAO_DE, content).sendToTarget();
        } catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println(doc.body().text());
    }
};

Runnable tencentRunnable = new Runnable() {
    @Override
    public void run() {
        String url = "http://lbs.qq.com/geo/log.html";
        Document doc = null;
        try {
            doc = Jsoup.connect(url).get();
            String content = doc.body().text();
            mHandler.obtainMessage(TENCENT, content).sendToTarget();
        } catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println(doc.body().text());

    }
};

Runnable baiduRunnable = new Runnable() {
    @Override
    public void run() {
        String url = "http://lbsyun.baidu.com/index.php?title=android-locsdk/theupdatelog";
        Document doc = null;
        try {
            doc = Jsoup.connect(url).get();
            String content = doc.body().text();
            mHandler.obtainMessage(BAI_DU, content).sendToTarget();
        } catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println(doc.body().text());
    }
};

}

4.获取网页内容并解析出正文部分
import java.io.IOException;
import java.util.Stack;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class GetContent {
static int index;

public static void main(String[] args) {
    String url = "http://lbs.amap.com/api/android-location-sdk/changelog";
    Document doc = null;
    try {
        doc = Jsoup.connect(url).get();
    } catch (IOException e) {
        e.printStackTrace();
    }
    String content = GetDocContent(doc);
    System.out.println(doc.body().text());
    // System.out.println("网页正文如下:\n" + content);

}

private static String GetDocContent(Document doc) {
    Elements divs = doc.body().getElementsByTag("div");
    int max = -1;
    String content = null;
    for (int i = 0; i < divs.size(); i++) {
        Element div = (Element) divs.get(i);
        String divContent = GetDivContent(div);
        if (divContent.length() > max) {
            max = divContent.length();
            content = divContent;
        }
    }
    return content;
}

private static String GetDivContent(Element div) {
    StringBuilder sb = new StringBuilder();

    // 考虑div里标签内容的顺序,对div子树进行深度优先搜索
    Stack<Element> sk = new Stack<Element>();
    sk.push(div);
    while (!sk.empty()) {
        Element e = sk.pop();

        // 对于div中的div过滤掉
        if (e != div && e.tagName().equals("div"))
            continue;

        // 考虑正文被包含在p标签中的情况,并且p标签里不能含有a标签
        if (e.tagName().equals("p") && e.getElementsByTag("a").size() == 0) {
            String className = e.className();
            if (className.length() != 0 && className.equals("pictext"))
                continue;
            sb.append(e.text());
            sb.append("\n");
            continue;
        } else if (e.tagName().equals("td")) {

            // 考虑正文被包含在td标签中的情况
            if (e.getElementsByTag("div").size() != 0)
                continue;
            sb.append(e.text());
            sb.append("\n");
            continue;
        }

        // 将孩子节点加入栈中
        Elements children = e.children();
        for (int i = children.size() - 1; i >= 0; i--) {
            sk.push((Element) children.get(i));
        }
    }
    return sb.toString();
}

}

  • 2
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值