- 需要引入maven
<!-- https://mvnrepository.com/artifact/com.ibm.icu/icu4j 编码格式--> <dependency> <groupId>com.ibm.icu</groupId> <artifactId>icu4j</artifactId> <version>65.1</version> </dependency>
- 代码如下
public class ShiftJisToUTF8Test { @Test public void test() throws IOException { String filePath = "D:\\test.csv"; File file = new File(filePath); // 得到csv的编码 String encoding = getFileCharsetByICU4J(file); System.out.println("encoding:" + encoding); if(StringUtil.isNotEmpty(encoding)){ if(StringUtils.equals(encoding,"Shift_JIS")){ // csv格式转换 Shift_JIS -> UTF-8 shiftJisToUTF8(filePath); } } System.out.println("ok"); } /** * csv格式转换 Shift_JIS -> UTF-8 */ public static void shiftJisToUTF8(String filePath) throws IOException { try (InputStream in = new FileInputStream(new File(filePath))){ Reader reader = new InputStreamReader(in, "Shift_JIS"); StringBuilder sb = new StringBuilder(); int read; while ((read = reader.read()) != -1){ sb.append((char)read); } reader.close(); String string = sb.toString(); OutputStream out = new FileOutputStream( "D:\\test1.csv"); Writer writer = new OutputStreamWriter(out, "UTF-8"); writer.write(string); writer.close(); } } /** * 判断并返回csv格式 */ public static String getFileCharsetByICU4J(File file) throws IOException{ String encoding = null; Path path = Paths.get(file.getPath()); byte[] data = Files.readAllBytes(path); CharsetDetector detector = new CharsetDetector(); detector.setText(data); //这个方法推测首选的文件编码格式 CharsetMatch match = detector.detect(); //这个方法可以推测出所有可能的编码方式 CharsetMatch[] charsetMatches = detector.detectAll(); if (match == null) { return encoding; } encoding = match.getName(); return encoding; } }