用Java实现二进制文件的分割与合并,大小文本文件的多样化分割与合并,文件排序算法:按自然语言排序
https://pan.baidu.com/s/1Lhzb6dwZOJ1Yy5Lu0ELQTg&pwd=9qze
#1. 代码
Main.java
import android.support.annotation.NonNull;
import java.util.Arrays;
import java.util.List;
import bin.mt.plugin.api.translation.BaseTranslationEngine;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.FileReader;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.json.JSONArray;
import org.json.JSONObject;
import android.content.SharedPreferences;
import java.io.*;
public class Main extends BaseTranslationEngine {
public Main() {
super(new ConfigurationBuilder()
// 关闭「跳过已翻译词条」
.setForceNotToSkipTranslated(true).build());
}
//分割,参数说明:需分割文件的全路径(输入),切割的文件大小(设置),切割后的文件单位默认为MB(设置),分割后文件存放的文件夹全路径,待分割文件格式
public void Split(String SrcFilePath, long SingleGoalFileSize, String SingleFileUnit, String GoalFileDirectory,
int format) {
//SingleGoalFileSize 文件大小 SingleFileUnit 文件单位默认单位:MB ,分割后文件路径和目录(省略时为当前文件夹), 默认各类文件有自己的分割方式
SharedPreferences preferences = getContext().getPreferences();
String op = preferences.getString("operate", "");//分割方式
String name_flag = preferences.getString("name_flag", "-🆎标识");
String[] nf = new String[] { "", "" };//批处理文件在该字符数组之间嵌入序号为标识符
int ab = name_flag.lastIndexOf("🆎");//根据最后一个🆎来识别
if (ab == -1) {
if (name_flag.equals("")) {
nf[0] = "-";
nf[1] = "标识";
} else {
nf[0] = name_flag;
}
} else {
nf[0] = name_flag.substring(0, ab);
nf[1] = name_flag.substring(ab).replace("🆎", "");
}
if (SrcFilePath.equals("") || SrcFilePath == null) {
System.out.println("分割失败!");
return;
}
File SrcFile = new File(SrcFilePath); //新建文件
long SrcFileSize = SrcFile.length();//源文件的大小
long SingleFileSize = 1;//分割后的单个文件大小(以字节为单位)
int unit = 1;//分割后的单个文件单位
//SingleFileUnit=SingleFileUnit.toUpperCase();
switch (SingleFileUnit) {
case "B":
SingleFileSize = SingleGoalFileSize;
unit = (int) SingleGoalFileSize;
break;
case "KB":
SingleFileSize = 1024 * SingleGoalFileSize;
if (SingleGoalFileSize > 2048)
unit = 1024 * 1024;
else
unit = (int) SingleFileSize;
break;
case "GB":
unit = 1024 * 1024;
SingleFileSize = unit * 1024 * SingleGoalFileSize;
unit *= 10;
break;
default:
unit = 1024 * 1024;
SingleFileSize = unit * SingleGoalFileSize;//默认单位MB
}
int GoalFileNum = (int) (SrcFileSize / SingleFileSize); //获取分割后文件的数量
GoalFileNum = SrcFileSize % SingleFileSize == 0 ? GoalFileNum : GoalFileNum + 1; //计算总的文件大小
int x1 = SrcFilePath.lastIndexOf("/"); //获取文件路径的分隔符位置
int x2 = SrcFilePath.lastIndexOf("."); //获取文件的后缀位置
String SrcFileName, fc = "";
if (x2 < x1)
SrcFileName = SrcFilePath.substring(x1 + 1);
else {
fc = SrcFilePath.substring(x2);
SrcFileName = SrcFilePath.substring(x1 + 1, x2); //截取文件名,如果要分割的文件没有后缀,则GoalFileDirectory参数不能为空,因为在文件位置无法新建同名文件夹
}
if (GoalFileDirectory == null || GoalFileDirectory.equals("")) {
GoalFileDirectory = SrcFilePath.substring(0, x1);
}
File goalDirectory = new File(GoalFileDirectory + File.separator + SrcFileName + File.separator);
if (!goalDirectory.exists()) {
goalDirectory.mkdirs();
}
GoalFileDirectory = GoalFileDirectory + File.separator + SrcFileName + File.separator + SrcFileName + nf[0];//File.separator 的作用相当于 ' \ '所以用 File.separator 保证了在任何系统下不会出错。
try {
FileInputStream fis = new FileInputStream(SrcFilePath); //新建输入流对象
//String bm = fc.matches("\\.apk|\\.zip|\\.rar|\\.7z|\\.tar|\\.gz|\\.exe|\\.dll|\\.so|\\.dex|\\.mp3|\\.mp4|\\.jpg") ? "ISO-8859-1" : "UTF-8";
//注意根据文件编码格式指定InputStreamReader的编码
InputStreamReader reader = null;//new InputStreamReader(fis,"UTF-8");//默认UTF-8 GBK ASCII ISO-8859-1 Windows-1252
BufferedReader bufferedReader = null;//new BufferedReader(reader);
//FileOutputStream fos = null;//new FileOutputStream(CompleteSingleGoalFilePath);
//BufferedOutputStream bos = null;//new BufferedOutputStream(fos); //包装
String tem = null, line = null, CompleteSingleGoalFilePath;
StringBuilder textbatch = new StringBuilder();//.delete(0,sb.length());
byte bytes[];
if (op.equals("file") || (op.equals("fit") && format == 0)) {
bytes = new byte[unit];//每次读取文件的大小
int len = -1;
BufferedInputStream bis = new BufferedInputStream(fis);
FileOutputStream fos = null;
BufferedOutputStream bos = null;
for (int i = 0; i < GoalFileNum; i++) {
//分割后的单个文件完整路径名
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(i) + nf[1] + fc;
fos = new FileOutputStream(CompleteSingleGoalFilePath);
bos = new BufferedOutputStream(fos); //包装
long count = 0;
while ((len = bis.read(bytes)) != -1) {
bos.write(bytes, 0, len);//从源文件读取规定大小的字节数写入到单个目标文件中
count += len;
if (count >= SingleFileSize)
break;
}
bos.flush();
bos.close();
fos.close();
}
bis.close();
fis.close();
} else if (op.equals("json") || (op.equals("fit") && format == 3)) {
reader = new InputStreamReader(fis, "UTF-8");//默认UTF-8 GBK ASCII ISO-8859-1 Windows-1252
bufferedReader = new BufferedReader(reader);
FileWriter fw = null;
BufferedWriter bufw = null;
while ((line = bufferedReader.readLine()) != null) {
textbatch.append(line).append(System.getProperty("line.separator"));
}
JSONArray jsonArray = new JSONArray(textbatch.toString());
textbatch.delete(0, textbatch.length());
for (int i = 0; i < jsonArray.length(); i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
CompleteSingleGoalFilePath = GoalFileDirectory + String.valueOf(i) + nf[1] + fc;
fw = new FileWriter(CompleteSingleGoalFilePath, false); //创建FileWriter类对象, true从尾部不覆盖
bufw = new BufferedWriter(fw); //创建BufferedWriter对象
bufw.write(jsonObject.toString());
bufw.flush(); //清空流
bufw.close(); //关闭BufferedWriter流
fw.close(); //关闭FileWriter流
}
bufferedReader.close();
reader.close();
} else {
reader = new InputStreamReader(fis, "UTF-8");//默认UTF-8 GBK ASCII ISO-8859-1 Windows-1252
bufferedReader = new BufferedReader(reader);
String remove = preferences.getString("Remover_regular", "");//正则查找🆎正则替换
String spliter = preferences.getString("spliter", "");//分割🈹0指令 换行符:\r|\n
//String[] rp = remove.indexOf("🆎")>0 ? remove.split("🆎", 2) : remove.split("$", 5);//split不可靠,空字符串不被解析,,,,即使大参数也不行
String[] rp = new String[] { "", "" };
ab = remove.lastIndexOf("🆎");//根据最后一个🆎来识别
if (ab != -1) {
if (ab == 0)
rp[0] = remove.replace("🆎", "");
else {
rp[0] = remove.substring(0, ab);
rp[1] = remove.substring(ab).replace("🆎", "");
//replaceAll()的参数是正则表达式,replaceAll("[\\t\\n\\r]", "");替换换行符,要替换成换行符String.replace("@","\t\n"),,前后参数不同!!!
rp[1] = rp[1].replaceAll("\\\\n", "\n").replaceAll("\\\\t", "\t");//将储存的\\n变成换行符
}
} else
rp[0] = remove;
//boolean flagsp = false;
//if(!spliter.equals("")) flagsp = true;
Pattern prp = Pattern.compile(spliter, Pattern.MULTILINE);//多行匹配^ $
Matcher mrp;
//如果文本匹配分割框输入了任意字符串,将开启匹配分割模式
if (spliter.equals("")) {
//文本文件按自定义字符串长度分割粗略