android webview jsoup,Android-使用JSOUP解析JS生成的网址

请参阅下面的UPDATE,第一个/可接受的解决方案不符合android的要求,但仅供参考。)

桌面解决方案

HtmlUnit似乎无法处理此站点(最近经常发生这种情况)。所以我也没有简单的Java解决方案,但是您可以使用PhantomJS:为您的操作系统下载二进制文件,创建脚本文件,从Java代码中启动进程,并使用dom解析器(如jsoup)解析输出。

脚本文件(这里称为simple.js):

var page = require('webpage').create();

var fs = require('fs');

var system = require('system');

var url = "";

var fileName = "output";

// first parameter: url

// second parameter: filename for output

console.log("args length: " + system.args.length);

if (system.args.length > 1) {

url=system.args[1];

}

if (system.args.length > 2){

fileName=system.args[2];

}

if(url===""){

phantom.exit();

}

page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36';

page.settings.loadImages = false;

page.open(url, function(status) {

console.log("Status: " + status);

if(status === "success") {

var path = fileName+'.html';

fs.write(path, page.content, 'w');

}

phantom.exit();

});

Java代码(获取标题和Cover-URL的示例):

try {

//change path to phantomjs binary and your script file

String outputFileName = "srulad";

String phantomJSPath = "phantomjs" + File.separator + "bin" + File.separator + "phantomjs";

String scriptFile = "simple.js";

String urlParameter = "http://srulad.com/#page-2";

new File(outputFileName+".html").delete();

Process process = Runtime.getRuntime().exec(phantomJSPath + " " + scriptFile + " " + urlParameter + " " + outputFileName);

process.waitFor();

Document doc = Jsoup.parse(new File(outputFileName + ".html"),"UTF-8"); // output.html is created by phantom.js, same path as page.js

Elements elements = doc.select("#list_page-2 > div");

for (Element element : elements) {

System.out.println(element.select("div.l-description.float-left > div:nth-child(1) > a").first().attr("title"));

System.out.println(element.select("div.l-image.float-left > a > img.lazy").first().attr("data-original"));

}

} catch (IOException | InterruptedException e) {

e.printStackTrace();

}

输出:

სიყვარული და მოწყალება / Love & Mercy

http://srulad.com/assets/uploads/42410_Love_and_Mercy.jpg

მუზა / The Muse

http://srulad.com/assets/uploads/43164_large_qRzsimNz0eDyFLFJcbVLIxlqii.jpg

...

更新

使用WebView和jsoup 可以在Android中解析具有基于javascript的动态内容的网站。以下示例应用程序使用启用了JavaScript的WebView呈现依赖Java的网站。使用JavascriptInterface,将返回html源,并用jsoup进行解析,作为概念证明,标题和封面图像的网址用于填充ListView。这些按钮减少或增加页码将触发ListView的更新。注意:已在Android 5.1.1 / API 22设备上测试。

向您的AndroidManifest.xml添加互联网权限

activity_main.xml

android:orientation="vertical"

android:layout_width="match_parent"

android:layout_height="match_parent">

android:orientation="horizontal"

android:layout_width="match_parent"

android:layout_height="wrap_content">

android:layout_width="wrap_content"

android:layout_height="wrap_content"

android:text="@string/page_down"

android:id="@+id/buttonDown"

android:layout_weight="0.5" />

android:layout_width="wrap_content"

android:layout_height="wrap_content"

android:text="@string/page_up"

android:id="@+id/buttonUp"

android:layout_weight="0.5" />

android:layout_width="match_parent"

android:layout_height="0dp"

android:id="@+id/listView"

android:layout_gravity="bottom"

android:layout_weight="0.5" />

MainActivity.java

public class MainActivity extends AppCompatActivity {

private final Handler uiHandler = new Handler();

private ArrayAdapter adapter;

private ArrayList entries = new ArrayList<>();

private ProgressDialog progressDialog;

private class JSHtmlInterface {

@android.webkit.JavascriptInterface

public void showHTML(String html) {

final String htmlContent = html;

uiHandler.post(

new Runnable() {

@Override

public void run() {

Document doc = Jsoup.parse(htmlContent);

Elements elements = doc.select("#online_movies > div > div");

entries.clear();

for (Element element : elements) {

String title = element.select("div.l-description.float-left > div:nth-child(1) > a").first().attr("title");

String imgUrl = element.select("div.l-image.float-left > a > img.lazy").first().attr("data-original");

entries.add(title + "\n" + imgUrl);

}

adapter.notifyDataSetChanged();

}

}

);

}

}

@Override

protected void onCreate(Bundle savedInstanceState) {

super.onCreate(savedInstanceState);

setContentView(R.layout.activity_main);

ListView listView = (ListView) findViewById(R.id.listView);

adapter = new ArrayAdapter<>(this, android.R.layout.simple_list_item_1, android.R.id.text1, entries);

listView.setAdapter(adapter);

progressDialog = ProgressDialog.show(this, "Loading","Please wait...", true);

progressDialog.setCancelable(false);

try {

final WebView browser = new WebView(this);

browser.setVisibility(View.INVISIBLE);

browser.setLayerType(View.LAYER_TYPE_NONE,null);

browser.getSettings().setJavaScriptEnabled(true);

browser.getSettings().setBlockNetworkImage(true);

browser.getSettings().setDomStorageEnabled(false);

browser.getSettings().setCacheMode(WebSettings.LOAD_NO_CACHE);

browser.getSettings().setLoadsImagesAutomatically(false);

browser.getSettings().setGeolocationEnabled(false);

browser.getSettings().setSupportZoom(false);

browser.addJavascriptInterface(new JSHtmlInterface(), "JSBridge");

browser.setWebViewClient(

new WebViewClient() {

@Override

public void onPageStarted(WebView view, String url, Bitmap favicon) {

progressDialog.show();

super.onPageStarted(view, url, favicon);

}

@Override

public void onPageFinished(WebView view, String url) {

browser.loadUrl("javascript:window.JSBridge.showHTML(''+document.getElementsByTagName('html')[0].innerHTML+'');");

progressDialog.dismiss();

}

}

);

findViewById(R.id.buttonDown).setOnClickListener(new View.OnClickListener() {

@Override

public void onClick(View view) {

uiHandler.post(new Runnable() {

@Override

public void run() {

int page = Integer.parseInt(browser.getUrl().split("-")[1]);

int newPage = page > 1 ? page-1 : 1;

browser.loadUrl("http://srulad.com/#page-" + newPage);

browser.loadUrl(browser.getUrl()); // not sure why this is needed, but doesn't update without it on my device

if(getSupportActionBar()!=null) getSupportActionBar().setTitle(browser.getUrl());

}

});

}

});

findViewById(R.id.buttonUp).setOnClickListener(new View.OnClickListener() {

@Override

public void onClick(View view) {

uiHandler.post(new Runnable() {

@Override

public void run() {

int page = Integer.parseInt(browser.getUrl().split("-")[1]);

int newPage = page+1;

browser.loadUrl("http://srulad.com/#page-" + newPage);

browser.loadUrl(browser.getUrl()); // not sure why this is needed, but doesn't update without it on my device

if(getSupportActionBar()!=null) getSupportActionBar().setTitle(browser.getUrl());

}

});

}

});

browser.loadUrl("http://srulad.com/#page-1");

if(getSupportActionBar()!=null) getSupportActionBar().setTitle(browser.getUrl());

} catch (Exception e) {

e.printStackTrace();

}

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值