文件格式转换
import codecs
import os
def convert(filename):
with codecs.open(filename, 'r', encoding='gb2312') as input_file:
content = input_file.read()
content_utf8 = content.encode('utf-8')
content_utf8_str = content_utf8.decode('utf-8')
with codecs.open(filename, 'w', encoding='utf-8') as output_file:
output_file.write(content_utf8_str)
def visit_folder(folder_path):
for file_name in os.listdir(folder_path):
file_path = os.path.join(folder_path, file_name)
if os.path.isfile(file_path):
convert(file_path)
elif os.path.isdir(file_path):
visit_folder(file_path)
visit_folder(r'C:\Desktop\test')
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
public class UTF8ToGB2312ConverterLocal {
public static void transform1(File file) {
try {
FileInputStream fis = new FileInputStream(file);
String charset = detectCharset(file);
if (file.getName().equals("application.yml")) return;
if (!charset.equals("UTF-8")) return;
System.out.println(file.getName());
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
File tempFile = new File("temp.txt");
FileOutputStream fos = new FileOutputStream(tempFile);
OutputStreamWriter osw = new OutputStreamWriter(fos, "GB2312");
BufferedWriter bw = new BufferedWriter(osw);
String line;
while ((line = br.readLine()) != null) {
String gb2312Line = transform4(line);
if (gb2312Line==null) System.out.println(file.getAbsolutePath());
bw.write(gb2312Line);
bw.newLine();
}
br.close();
bw.close();
file.delete();
tempFile.renameTo(file);
System.out.println("ok!!!");
} catch (IOException e) {
e.printStackTrace();
}
}
public static String transform4(String utf8String) {
byte[] utf8Bytes = utf8String.getBytes(Charset.forName("UTF-8"));
Charset utf8Charset = Charset.forName("UTF-8");
Charset gb2312Charset = Charset.forName("GB2312");
CharsetDecoder utf8Decoder = utf8Charset.newDecoder();
CharsetEncoder gb2312Encoder = gb2312Charset.newEncoder();
try {
CharBuffer utf8CharBuffer = utf8Decoder.decode(ByteBuffer.wrap(utf8Bytes));
ByteBuffer gb2312ByteBuffer = gb2312Encoder.encode(utf8CharBuffer);
byte[] gb2312Bytes = new byte[gb2312ByteBuffer.remaining()];
gb2312ByteBuffer.get(gb2312Bytes);
String gb2312String = new String(gb2312Bytes, Charset.forName("GB2312"));
System.out.println("UTF-8: " + utf8String);
System.out.println("GB2312: " + gb2312String);
return gb2312String;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public static String detectCharset(File file) throws IOException {
Path path = Paths.get(file.getAbsolutePath());
CharsetDetector detector = new CharsetDetector();
detector.setText(Files.readAllBytes(path));
CharsetMatch match = detector.detect();
return match.getName();
}
public static void transform(String folderPath) throws IOException {
File folder = new File(folderPath);
File[] files = folder.listFiles();
for (File file : files) {
if (file.isFile()) {
transform1(file);
}
}
}
public static void checkFilesCharset(String folderPath) throws IOException {
File folder = new File(folderPath);
File[] files = folder.listFiles();
for (File file : files) {
if (file.isFile()) {
String charset = detectCharset(file);
System.out.println(file.getName() + " is "+charset);
if (charset.contains("GB")) {
System.out.println(file.getName() + " is GBK");
} else {
System.out.println(file.getName() + " is not GBK");
}
}
}
}
public static void traverseFolderTransform(File folder) throws FileNotFoundException {
if (folder.isDirectory()) {
File[] files = folder.listFiles();
if (files != null) {
for (File file : files) {
if (file.isDirectory()) {
traverseFolderTransform(file);
} else {
transform1(file);
}
}
}
} else {
System.out.println(folder.getAbsolutePath());
}
}
public static void traverseFolderDetect(File folder) throws IOException {
if (folder.isDirectory()) {
File[] files = folder.listFiles();
if (files != null) {
for (File file : files) {
if (file.isDirectory()) {
traverseFolderDetect(file);
} else {
String charset = detectCharset(file);
if (charset.contains("GB")) {
System.out.println(file.getName() + " is GBK");
} else {
}
}
}
}
} else {
System.out.println(folder.getAbsolutePath());
}
}
public static void main(String[] args) throws IOException {
traverseFolderDetect(new File("D:\\Desktop\\server\\gray\\server_RERAT"));
}
}