Android中对html进行分析
1.Android 中获取html源代码
public String getHtmlString(String urlString) {
try {
URL url = new URL(urlString);
URLConnection ucon = url.openConnection();
InputStream instr = ucon.getInputStream();
BufferedInputStream bis = new BufferedInputStream(instr);
ByteArrayBuffer baf = new ByteArrayBuffer(500);
int current = 0;
while ((current = bis.read()) != -1) {
baf.append((byte) current);
}
return EncodingUtils.getString(baf.toByteArray(), "UTF-8");
} catch (Exception e) {
return e.getMessage();
}
}
或者亦可以用jsoup直接获取html
try {
URL url = new URL(urlString);
URLConnection ucon = url.openConnection();
InputStream instr = ucon.getInputStream();
BufferedInputStream bis = new BufferedInputStream(instr);
ByteArrayBuffer baf = new ByteArrayBuffer(500);
int current = 0;
while ((current = bis.read()) != -1) {
baf.append((byte) current);
}
return EncodingUtils.getString(baf.toByteArray(), "UTF-8");
} catch (Exception e) {
return e.getMessage();
}
}
或者亦可以用jsoup直接获取html
2.只要将jsoup的jar文件加入libs文件夹中即可在工程中使用jsoup,jsoup 中文版的使用手册
Android读取word文件
- package com.word.read;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileNotFoundException;
- import org.textmining.text.extraction.WordExtractor;
- import Android.app.Activity;
- import Android.os.Bundle;
- import Android.os.Environment;
- import Android.widget.TextView;
- public class WordReader extends Activity {
- /** Called when the activity is first created. */
-
- private TextView text;
-
- @Override
- public void onCreate(Bundle savedInstanceState) {
- super .onCreate(savedInstanceState);
- setContentView(R.layout.main);
-
- text = (TextView) findViewById(R.id.text);
-
- String str = readWord(Environment.getExternalStorageDirectory().getAbsolutePath() + "/baojinggong.doc" );
- text.setText(str.trim().replace( "\r" , "" ));
- }
-
- public String readWord(String file){
- //创建输入流用来<a title="读取doc文件" >读取doc文件</a>
- FileInputStream in;
- String text = null ;
- try {
- in = new FileInputStream( new File(file));
- WordExtractor extractor = null ;
- //创建WordExtractor
- extractor = new WordExtractor();
- //进行提取对doc文件
- text = extractor.extractText(in);
- }
- catch (FileNotFoundException e) {
- e.printStackTrace();
- }
- catch (Exception e) {
- e.printStackTrace();
- }
- return text;
- }
- }