简单地学习Java爬虫->使用Jsoup
一、gradle环境搭建
学习参考资料:Jsoup文档
implementation 'org.jsoup:jsoup:1.11.3'
二、Activity
package com.example.testforjsoup;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.util.Log;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class MainActivity extends AppCompatActivity {
private String url = "https://en.wikipedia.org/wiki/Main_Page";
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
ExecutorService executorService= Executors.newSingleThreadExecutor();
executorService.execute(new Runnable() {
@Override
public void run() {
try {
//获取Jsoup访问url链接的文档对象
Document document = Jsoup.connect(url).timeout(10000).get();
Log.d("zbv", "document of title=" + document.title());
Elements newsHeadlines = document.select("#mp-itn b a");
for (Element element : newsHeadlines) {
String title = element.attr("title");
String text = element.text();
//补全作为可使用的URL
// String absUrl = element.absUrl("href");
//一般的String文本 要达到absUrl的效果可以这样:"abs:href"
String absUrl=element.attr("href");
Log.d("zbv", "title=" + title + ";text=" + text + ";absUrl=" + absUrl);
}
} catch (IOException e) {
e.printStackTrace();
Log.e("zbv", "IOException", e);
}
}
});
}
}
后续更新中,写个爬取数据后的简单App。。。