1.HTTPclient方式
package com.example.jiaoleiqiang.getwebdata;
import android.os.Bundle;
import android.os.Handler;
import android.os.Message;
import android.support.v7.app.AppCompatActivity;
import android.view.View;
import android.view.Window;
import android.view.WindowManager;
import android.widget.Button;
import android.widget.TextView;
import android.widget.Toast;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MainActivity extends AppCompatActivity implements View.OnClickListener {
private Button getData;
private TextView webDataShow;
private String webData;
// private String webSite = "http://lbsyun.baidu.com/index.php?title=android-locsdk/theupdatelog";
// private String webSite = “http://lbs.qq.com/geo/log.html“;
private String webSite = “http://lbs.amap.com/api/android-location-sdk/changelog“;
private static final int MSG_SUCCESS = 0;
private static final int MSG_FAILURE = 1;
private Handler mHandler = null;
private Thread httpClientThread;
private ArrayList<String> marks = new ArrayList<>();
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
requestWindowFeature(Window.FEATURE_NO_TITLE);
getWindow().setFlags(WindowManager.LayoutParams.FLAG_FULLSCREEN,WindowManager.LayoutParams.FLAG_FULLSCREEN);
setContentView(R.layout.activity_main);
getData = findViewById(R.id.httpclient);
webDataShow = findViewById(R.id.webDataShow);
getData.setOnClickListener(this);
getData.setOnClickListener(this);
mHandler = new Handler() {
@Override
public void handleMessage(Message msg) {
switch (msg.what) {
case MSG_SUCCESS:
Toast.makeText(getApplicationContext(), "连接成功!",Toast.LENGTH_LONG).show();
Document doc = Jsoup.parse(msg.obj.toString());
webData = doc.body().text();
String re = "\\d{4}-\\d{1,2}-\\d{2}";
Pattern p = Pattern.compile(re);
Matcher m = p.matcher(webData);
while (m.find()) {
marks.add(m.group());
}
webData = webData.substring(webData.indexOf(marks.get(0)),webData.indexOf(marks.get(1)));
webDataShow.setText(webData);
break;
case MSG_FAILURE:
Toast.makeText(getApplicationContext(), "链接失败", Toast.LENGTH_LONG).show();
}
}
};
}
@Override
public void onClick(View view) {
switch (view.getId()) {
case R.id.httpclient:
if (httpClientThread == null) {
httpClientThread = new Thread(httpClientRunnable);
httpClientThread.start();
}
}
}
Runnable httpClientRunnable = new Runnable() {
@Override
public void run() {
httpClientWebData();
}
};
private void httpClientWebData() {
DefaultHttpClient httpClient = new DefaultHttpClient();
HttpGet httpGet = new HttpGet(webSite);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
try {
//String content = httpClient.execute(httpGet, responseHandler);
String content = new String(httpClient.execute(httpGet, responseHandler).getBytes(), "utf-8");
mHandler.obtainMessage(MSG_SUCCESS, content).sendToTarget();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
}
2.URLconnection(获取失败,原因不明)
package com.example.jiaoleiqiang.getwebdata2;
import android.os.Bundle;
import android.os.Handler;
import android.os.Message;
import android.support.v7.app.AppCompatActivity;
import android.view.View;
import android.widget.Button;
import android.widget.TextView;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
public class MainActivity extends AppCompatActivity implements View.OnClickListener {
private Button getData;
private TextView webData;
Handler handler = new Handler() {
@Override
public void handleMessage(Message msg) {
super.handleMessage(msg);
if (msg.what == 1) {
Bundle b = msg.getData();
String str = b.getString("value");
webData.setText(str);
}
}
};
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
getData = findViewById(R.id.getData);
webData = findViewById(R.id.webData);
getData.setOnClickListener(this);
}
@Override
public void onClick(View view) {
switch (view.getId()) {
case R.id.getData:
new getDataThread().start();
}
}
class getDataThread extends Thread {
@Override
public void run() {
try {
URL mUrl = new URL("http://lbs.amap.com/api/android-location-sdk/changelog");
byte[] buf = WebUtill.getContent(mUrl, "GET", "utf-8");
String strResult = new String(buf,"utf-8");
Bundle data = new Bundle();
data.putString("value", strResult);
Message msg = new Message();
msg.what = 1;
msg.setData(data);
handler.sendMessage(msg);
System.out.println("------>"+strResult);// 直接输出内容
} catch (Exception e) {
e.printStackTrace();
} finally {
WebUtill.closeConnection();
}
}
}
private static class WebUtill {
static HttpURLConnection mHttpUrlConnection;
static InputStream mInputStream;
/**
* @param url address
* @param method post or get
* @param codeType utf-8 or other
* @return
* @throws Exception
*/
public static byte[] getContent(URL url, String method, String codeType) throws Exception {
URL mUrl = url;
mHttpUrlConnection = (HttpURLConnection) mUrl.openConnection();
mHttpUrlConnection.setConnectTimeout(6000);// 设置连接超时
mHttpUrlConnection.setRequestMethod(method);// get方式 发起请求
if (mHttpUrlConnection.getResponseCode() != 200) {
throw new RuntimeException("Fail to request url");
}
byte[] result;
mInputStream = mHttpUrlConnection.getInputStream();// 得到网络返回的流
result = readDate(mInputStream, "utf-8");
mInputStream.close();
return result;
}
private static byte[] readDate(InputStream input, String mode) throws IOException {
byte[] buff = new byte[input.available()];
System.out.println("input 的长度:" + input.available());
input.read(buff);
return buff;
}
public static void closeConnection() {
if (mHttpUrlConnection != null)
mHttpUrlConnection.disconnect();
}
}
}
3.jsoup方式
package com.example.jiaoleiqiang.getwebcontent;
import android.os.Handler;
import android.os.Message;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.view.View;
import android.widget.Button;
import android.widget.TextView;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MainActivity extends AppCompatActivity implements View.OnClickListener {
private Button updateList;
private TextView gaodeUpdate;
private TextView tencentUpdate;
private TextView baiduUpdate;
private static final int GAO_DE = 0;
private static final int TENCENT = 1;
private static final int BAI_DU = 2;
private String gaodeData;
private String tencentData;
private String baiduData;
private Handler mHandler = null;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
updateList = findViewById(R.id.updateList);
gaodeUpdate = findViewById(R.id.gaodeUpdate);
tencentUpdate = findViewById(R.id.tencentUpdate);
baiduUpdate = findViewById(R.id.baiduUpdate);
updateList.setOnClickListener(this);
mHandler = new Handler() {
@Override
public void handleMessage(Message msg) {
switch (msg.what) {
case GAO_DE:
final ArrayList<String> gaoMarks = new ArrayList<>();
Document doc = Jsoup.parse(msg.obj.toString());
gaodeData = doc.body().text();
String re = "\\d{4}-\\d{1,2}-\\d{2}";
Pattern p = Pattern.compile(re);
Matcher m = p.matcher(gaodeData);
while (m.find()) {
gaoMarks.add(m.group());
}
gaodeData = gaodeData.substring(gaodeData.indexOf(gaoMarks.get(0)), gaodeData.indexOf(gaoMarks.get(1)));
gaodeUpdate.setText(gaodeData);
break;
case TENCENT:
final ArrayList<String> tencentMarks = new ArrayList<>();
doc = Jsoup.parse(msg.obj.toString());
tencentData = doc.body().text();
re = "\\d{4}-\\d{2}-\\d{2}";
p = Pattern.compile(re);
m = p.matcher(tencentData);
while (m.find()) {
tencentMarks.add(m.group());
}
tencentData = tencentData.substring(tencentData.indexOf(tencentMarks.get(0)) + 10);
tencentData = tencentData.substring(tencentData.indexOf(tencentMarks.get(0)));
tencentData = tencentData.substring(0, tencentData.indexOf(tencentMarks.get(1)));
tencentUpdate.setText(tencentData);
break;
case BAI_DU:
doc = Jsoup.parse(msg.obj.toString());
baiduData = doc.body().text();
baiduData = baiduData.substring(0, baiduData.lastIndexOf(";") + 1);
String str1 = baiduData.substring(baiduData.lastIndexOf(":") - 2);
baiduData = baiduData.substring(0, baiduData.lastIndexOf(":") - 2);
for (int i = 0; i < 2; i++) {
String ss = baiduData.substring(baiduData.lastIndexOf(":") - 2, baiduData.lastIndexOf(":") + 1);
if (str1.contains(ss)) {
str1 += "";
} else {
str1 = baiduData.substring(baiduData.lastIndexOf(":") - 2) + str1;
baiduData = baiduData.substring(0, baiduData.lastIndexOf(":") - 2);
}
}
baiduUpdate.setText(str1);
break;
default:
break;
}
}
};
}
@Override
public void onClick(View view) {
switch (view.getId()) {
case R.id.updateList:
new Thread(gaodeRunnable).start();
new Thread(tencentRunnable).start();
new Thread(baiduRunnable).start();
break;
}
}
Runnable gaodeRunnable = new Runnable() {
@Override
public void run() {
String url = "http://lbs.amap.com/api/android-location-sdk/changelog";
Document doc = null;
try {
doc = Jsoup.connect(url).get();
String content = doc.body().text();
mHandler.obtainMessage(GAO_DE, content).sendToTarget();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(doc.body().text());
}
};
Runnable tencentRunnable = new Runnable() {
@Override
public void run() {
String url = "http://lbs.qq.com/geo/log.html";
Document doc = null;
try {
doc = Jsoup.connect(url).get();
String content = doc.body().text();
mHandler.obtainMessage(TENCENT, content).sendToTarget();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(doc.body().text());
}
};
Runnable baiduRunnable = new Runnable() {
@Override
public void run() {
String url = "http://lbsyun.baidu.com/index.php?title=android-locsdk/theupdatelog";
Document doc = null;
try {
doc = Jsoup.connect(url).get();
String content = doc.body().text();
mHandler.obtainMessage(BAI_DU, content).sendToTarget();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(doc.body().text());
}
};
}
4.获取网页内容并解析出正文部分
import java.io.IOException;
import java.util.Stack;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class GetContent {
static int index;
public static void main(String[] args) {
String url = "http://lbs.amap.com/api/android-location-sdk/changelog";
Document doc = null;
try {
doc = Jsoup.connect(url).get();
} catch (IOException e) {
e.printStackTrace();
}
String content = GetDocContent(doc);
System.out.println(doc.body().text());
// System.out.println("网页正文如下:\n" + content);
}
private static String GetDocContent(Document doc) {
Elements divs = doc.body().getElementsByTag("div");
int max = -1;
String content = null;
for (int i = 0; i < divs.size(); i++) {
Element div = (Element) divs.get(i);
String divContent = GetDivContent(div);
if (divContent.length() > max) {
max = divContent.length();
content = divContent;
}
}
return content;
}
private static String GetDivContent(Element div) {
StringBuilder sb = new StringBuilder();
// 考虑div里标签内容的顺序,对div子树进行深度优先搜索
Stack<Element> sk = new Stack<Element>();
sk.push(div);
while (!sk.empty()) {
Element e = sk.pop();
// 对于div中的div过滤掉
if (e != div && e.tagName().equals("div"))
continue;
// 考虑正文被包含在p标签中的情况,并且p标签里不能含有a标签
if (e.tagName().equals("p") && e.getElementsByTag("a").size() == 0) {
String className = e.className();
if (className.length() != 0 && className.equals("pictext"))
continue;
sb.append(e.text());
sb.append("\n");
continue;
} else if (e.tagName().equals("td")) {
// 考虑正文被包含在td标签中的情况
if (e.getElementsByTag("div").size() != 0)
continue;
sb.append(e.text());
sb.append("\n");
continue;
}
// 将孩子节点加入栈中
Elements children = e.children();
for (int i = children.size() - 1; i >= 0; i--) {
sk.push((Element) children.get(i));
}
}
return sb.toString();
}
}