读取字幕文件时,没有直接用字符串分割,而是尝试使用正则来匹配,分割
字幕文件
Pattern pattern = Pattern.compile("Dialogue: \\d+,(\\d+:\\d+:\\d+\\.\\d+),(\\d+:\\d+:\\d+\\.\\d+),[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,(.*?)");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath),StandardCharsets.UTF_16LE))) {
String line;
while ((line = reader.readLine()) != null){
lines.add(line);
// System.out.println(line);
}
} catch (Exception e) {
e.printStackTrace();
}
for (int i = 0; i < lines.size()/2; i++) {
Matcher matcher = pattern.matcher(lines.get(i));
System.out.println(lines.get(i));
if (matcher.matches()) {
System.out.println(i);
String startTime = matcher.group(1);
String endTime = matcher.group(2);
String text = matcher.group(3).replaceAll("\\{[^}]+\\}", "").trim();
startTimes.add(startTime);
endTimes.add(endTime);
subtitles1.add(text);
//System.out.println(text);
}
}
然后每次都读不到第一行,后来百度,问ai,发现时utf_16首行前面有隐藏的东西BOM,加了以下代码解决
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath),StandardCharsets.UTF_16LE))) {
reader.mark(1);
if (reader.read() != 0xFEFF) { // 检查是否有BOM,0xFEFF是UTF-16的BOM
reader.reset(); // 如果没有BOM,重置reader到开头
}
String line;
while ((line = reader.readLine()) != null){
lines.add(line);
// System.out.println(line);
}
} catch (Exception e) {
e.printStackTrace();
}