可以直接复制实验,
解析doc,要tm-extractors-0.4.jar这个包
解析xls,要jxl.jar这个包
01 | public static String readDOC(String path) { |
02 | // 创建输入流读取doc文件 |
03 | FileInputStream in; |
04 | String text = null ; |
05 | // Environment.getExternalStorageDirectory().getAbsolutePath()+ "/aa.doc") |
06 | try { |
07 | in = new FileInputStream( new File(path)); |
08 | int a= in.available(); |
09 | WordExtractor extractor = null ; |
10 | // 创建WordExtractor |
11 | extractor = new WordExtractor(); |
12 | // 对doc文件进行提取 |
13 | text = extractor.extractText(in); |
14 | System.out.println( "解析得到的东西" +text); |
15 | } catch (FileNotFoundException e) { |
16 | e.printStackTrace(); |
17 | } catch (Exception e) { |
18 | e.printStackTrace(); |
19 | } |
20 | if (text == null ) { |
21 | text = "解析文件出现问题" ; |
22 | } |
23 | return text; |
24 | } |
01 | public static String readXLS(String path) { |
02 | String str = "" ; |
03 | try { |
04 | Workbook workbook = null ; |
05 | workbook = Workbook.getWorkbook( new File(path)); |
06 | Sheet sheet = workbook.getSheet( 0 ); |
07 | Cell cell = null ; |
08 | int columnCount = sheet.getColumns(); |
09 | int rowCount = sheet.getRows(); |
10 | for ( int i = 0 ; i < rowCount; i++) { |
11 | for ( int j = 0 ; j < columnCount; j++) { |
12 | cell = sheet.getCell(j, i); |
13 | String temp2 = "" ; |
14 | if (cell.getType() == CellType.NUMBER) { |
15 | temp2 = ((NumberCell) cell).getValue() + "" ; |
16 | } else if (cell.getType() == CellType.DATE) { |
17 | temp2 = "" + ((DateCell) cell).getDate(); |
18 | } else { |
19 | temp2 = "" + cell.getContents(); |
20 | } |
21 | str = str + " " + temp2; |
22 | } |
23 | str += "\n" ; |
24 | } |
25 | workbook.close(); |
26 | } catch (Exception e) { |
27 | } |
28 | if (str == null ) { |
29 | str = "解析文件出现问题" ; |
30 | } |
31 | return str; |
32 | } |
解析docx
public static String readDOCX(String path) { |
02 | String river = "" ; |
03 | try { |
04 | ZipFile xlsxFile = new ZipFile( new File(path)); |
05 | ZipEntry sharedStringXML = xlsxFile.getEntry( "word/document.xml" ); |
06 | InputStream inputStream = xlsxFile.getInputStream(sharedStringXML); |
07 | XmlPullParser xmlParser = Xml.newPullParser(); |
08 | xmlParser.setInput(inputStream, "utf-8" ); |
09 | int evtType = xmlParser.getEventType(); |
10 | while (evtType != XmlPullParser.END_DOCUMENT) { |
11 | switch (evtType) { |
12 | case XmlPullParser.START_TAG: |
13 | String tag = xmlParser.getName(); |
14 | System.out.println(tag); |
15 | if (tag.equalsIgnoreCase( "t" )) { |
16 | river += xmlParser.nextText() + "\n" ; |
17 | } |
18 | break ; |
19 | case XmlPullParser.END_TAG: |
20 | break ; |
21 | default : |
22 | break ; |
23 | } |
24 | evtType = xmlParser.next(); |
25 | } |
26 | } catch (ZipException e) { |
27 | e.printStackTrace(); |
28 | } catch (IOException e) { |
29 | e.printStackTrace(); |
30 | } catch (XmlPullParserException e) { |
31 | e.printStackTrace(); |
32 | } |
33 | if (river == null ) { |
34 | river = "解析文件出现问题" ; |
35 | } |
36 | return river; |
37 | } |
01 | public static String readXLSX(String path) { |
02 | String str = "" ; |
03 | String v = null ; |
04 | boolean flat = false ; |
05 | List<String> ls = new ArrayList<String>(); |
06 | try { |
07 | ZipFile xlsxFile = new ZipFile( new File(path)); |
08 | ZipEntry sharedStringXML = xlsxFile |
09 | .getEntry( "xl/sharedStrings.xml" ); |
10 | InputStream inputStream = xlsxFile.getInputStream(sharedStringXML); |
11 | XmlPullParser xmlParser = Xml.newPullParser(); |
12 | xmlParser.setInput(inputStream, "utf-8" ); |
13 | int evtType = xmlParser.getEventType(); |
14 | while (evtType != XmlPullParser.END_DOCUMENT) { |
15 | switch (evtType) { |
16 | case XmlPullParser.START_TAG: |
17 | String tag = xmlParser.getName(); |
18 | if (tag.equalsIgnoreCase( "t" )) { |
19 | ls.add(xmlParser.nextText()); |
20 | } |
21 | break ; |
22 | case XmlPullParser.END_TAG: |
23 | break ; |
24 | default : |
25 | break ; |
26 | } |
27 | evtType = xmlParser.next(); |
28 | } |
29 | ZipEntry sheetXML = xlsxFile.getEntry( "xl/worksheets/sheet1.xml" ); |
30 | InputStream inputStreamsheet = xlsxFile.getInputStream(sheetXML); |
31 | XmlPullParser xmlParsersheet = Xml.newPullParser(); |
32 | xmlParsersheet.setInput(inputStreamsheet, "utf-8" ); |
33 | int evtTypesheet = xmlParsersheet.getEventType(); |
34 | while (evtTypesheet != XmlPullParser.END_DOCUMENT) { |
35 | switch (evtTypesheet) { |
36 | case XmlPullParser.START_TAG: |
37 | String tag = xmlParsersheet.getName(); |
38 | if (tag.equalsIgnoreCase( "row" )) { |
39 | } else if (tag.equalsIgnoreCase( "c" )) { |
40 | String t = xmlParsersheet.getAttributeValue( null , "t" ); |
41 | if (t != null ) { |
42 | flat = true ; |
43 | System.out.println(flat + "有" ); |
44 | } else { |
45 | System.out.println(flat + "没有" ); |
46 | flat = false ; |
47 | } |
48 | } else if (tag.equalsIgnoreCase( "v" )) { |
49 | v = xmlParsersheet.nextText(); |
50 | if (v != null ) { |
51 | if (flat) { |
52 | str += ls.get(Integer.parseInt(v)) + " " ; |
53 | } else { |
54 | str += v + " " ; |
55 | } |
56 | } |
57 | } |
58 | break ; |
59 | case XmlPullParser.END_TAG: |
60 | if (xmlParsersheet.getName().equalsIgnoreCase( "row" ) |
61 | && v != null ) { |
62 | str += "\n" ; |
63 | } |
64 | break ; |
65 | } |
66 | evtTypesheet = xmlParsersheet.next(); |
67 | } |
68 | System.out.println(str); |
69 | } catch (ZipException e) { |
70 | e.printStackTrace(); |
71 | } catch (IOException e) { |
72 | e.printStackTrace(); |
73 | } catch (XmlPullParserException e) { |
74 | e.printStackTrace(); |
75 | } |
76 | if (str == null ) { |
77 | str = "解析文件出现问题" ; |
78 | } |
79 | return str; |
80 | } |