/** * 根据文件路径获取文件的编码格式 * @param filePath * @return */ private static String getTextFileCharset(String filePath) { String[] charsets = {"US-ASCII", "UTF-8", "GB2312", "BIG5", "GBK", "GB18030", "UTF-16BE", "UTF-16LE", "UTF-16", "UNICODE"}; String charset = Charset.defaultCharset().displayName(); CharsetDecoder decoder; BufferedReader br = null; String s = null; for (int i = 0; i < charsets.length; i++) { decoder = Charset.forName(charsets[i]).newDecoder(); try { br = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), decoder)); do { s = br.readLine(); } while (s != null); charset = charsets[i]; break; } catch (FileNotFoundException e) { e.printStackTrace(); break; } catch (MalformedInputException e) { //如果编码不能解码此文本就会抛出这个异常 continue; } catch (IOException e) { e.printStackTrace(); break; } } return charset; }
/** * Metodo responsavel por fazer parse de um arquivos de legenda. <br> * Obs. O texto n�o vai conter quebra de linhas e pode ser usado Node * * @param path * @return */ public static ArrayList<Subtitle> getSubtitlesFromFile(String path, boolean twm, boolean usingNodes) { String codeFormat = getTextFileCharset(path); ArrayList<Subtitle> subtitles = null; Subtitle sub = null; StringBuilder srt = null; try { FileInputStream fileInputStream = new FileInputStream(new File(path)); BufferedReader br = new BufferedReader(new InputStreamReader(fileInputStream, Charset.forName(codeFormat))); subtitles = new ArrayList<>(); sub = new Subtitle(); srt = new StringBuilder(); while (br.ready()) { String line = br.readLine(); Matcher matcher = PATTERN_NUMBERS.matcher(line); if (matcher.find()) { sub.id = Integer.parseInt(matcher.group(1)); // index line = br.readLine(); } matcher = PATTERN_TIME.matcher(line); if (matcher.find()) { sub.startTime = matcher.group(1); // start time sub.timeIn = SRTUtils.textTimeToMillis(sub.startTime); sub.endTime = matcher.group(2); // end time sub.timeOut = SRTUtils.textTimeToMillis(sub.endTime); } String aux; while ((aux = br.readLine()) != null && !aux.isEmpty()) { srt.append(aux); if (twm) srt.append("\n"); else { if (!line.endsWith(" ")) // for new lines '\n' removed from BufferedReader srt.append(" "); } } srt.delete(srt.length() > 0 ? srt.length() - 1 : 0, srt.length()); // remove '\n' or space from end string line = srt.toString(); srt.setLength(0); if (line != null && !line.isEmpty()) line = line.replaceAll("<[^>]*>", ""); // clear all tags sub.text = line; subtitles.add(sub); if (usingNodes) { sub.nextSubtitle = new Subtitle(); sub = sub.nextSubtitle; } else { sub = new Subtitle(); } } } catch (Exception e) { e.printStackTrace(); } return subtitles; }