非信息安全方向的学生,课程中遇到了算术编码,要求使用算术编码等方法完成一个简单功能,发现csdn上的应用不是很丰富,所以自己写了一个demo,能够把英文文章转化成数字的形式,可以用来简单的加密些敏感内容。
1.resources下面有三个txt文档,test是需要加密的文件,test_1是生成加密数组的文件,这两个文件可以是任意的英文文档。test_arthmetic是加密结果。
2.直接上代码,arthmetic_demo方法中,首先生成了一个map,这个map的用途可以看其他文章的解释,接下来调用了getEncodingForOneArticle、getDncodingForOneArticle方法,分别对一篇文章进行加密解密。这里提到的三个方法都可以看作工具方法,真正的算术加密并不在这里实现。
对文章加密时,需要把某一个单词转化为一个数字,这时用到了getDecodingForOneWord、getEecodingForOneWord两个方法,算术编码的实现就在这两个方法中。
package arthmetic;
import java.io.*;
import java.util.Map;
import java.util.HashMap;
import java.util.Optional;
public class Arthmetic {
public static void main(String[] args) {
String path1 = "test_1.txt";
String path2 = "test.txt";
String path3 = "test_arthmetic.txt";
arthmetic_demo(path1, path2, path3);
File f = new File("src\\\\main\\\\resources\\\\test_arthmetic.txt");
System.out.println(f.length());
}
private static void arthmetic_demo(String path1, String path2, String path3) {
Map<String, double[]> range_map = getRangeMap(path1);
getEncodingForOneArticle(path2, path3, range_map);
getDncodingForOneArticle(path3, range_map);
}
private static void getDncodingForOneArticle(String path3, Map<String, double[]> range_map) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File("src\\\\main\\\\resources\\\\" + path3)),
"UTF-8"));
String lineTxt = null;
while ((lineTxt = br.readLine()) != null) {//数据以逗号分隔
String[] words = lineTxt.replaceAll("\\s{1,}", " ").split(" ");
for (String s : words) {
double d = Double.valueOf(s.substring(0, s.lastIndexOf("@")));
System.out.print(getDecodingForOneWord(d, range_map, Integer.valueOf(s.substring(s.lastIndexOf("@") + 1, s.length()))) + " ");
}
System.out.println();
}
br.close();
} catch (Exception e) {
System.err.println("read errors :" + e);
}
}
private static void getEncodingForOneArticle(String path2, String path3, Map<String, double[]> range_map) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File("src\\\\main\\\\resources\\\\" + path2)),
"UTF-8"));
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File("src\\\\main\\\\resources\\\\" + path3)),
"UTF-8"));
String lineTxt = null;
while ((lineTxt = br.readLine()) != null) {//数据以逗号分隔
String[] words = lineTxt.replaceAll("\\s{1,}", " ").split(" ");
for (String s : words) {
bw.write(getEncodingForOneWord(s, range_map) + "@" + s.length() + " ");
}
bw.newLine();
}
br.close();
bw.close();
} catch (Exception e) {
System.err.println("read errors :" + e);
}
}
private static String getDecodingForOneWord(double encoding, Map<String, double[]> range_map, int length) {
Optional o = range_map.entrySet().stream().filter(entry -> (entry.getValue()[0] < encoding & entry.getValue()[1] > encoding)).findFirst();
java.util.HashMap.Entry e = (java.util.HashMap.Entry) o.get();
String beginString = (String) e.getKey();
double low = range_map.get(beginString)[0];
double high = range_map.get(beginString)[1];
double range = high - low;
double start = 0.0;
for (int i = 1; i < length; i++) {
for (String s : range_map.keySet()) {
if (encoding > low + range * range_map.get(s)[0] & encoding < low + range * range_map.get(s)[1]) {
beginString += s;
start = low;
low = start + range * range_map.get(s)[0];
high = start + range * range_map.get(s)[1];
range = high - low;
break;
}
}
}
return beginString;
}
private static String getEncodingForOneWord(String source, Map<String, double[]> range_map) {
double low = 0.0;
double high = 1.0;
double range = 1.0;
double start = 0.0;
for (Character s : source.toCharArray()) {
range = high - low;
start = low;
low = start + range * range_map.get(s.toString().toLowerCase())[0];
high = start + range * range_map.get(s.toString().toLowerCase())[1];
}
return "" + (low + high) / 2;
}
private static Map<String, double[]> getRangeMap(String path1) {
Map<String, Integer> map = new HashMap<String, Integer>();
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File("src\\\\main\\\\resources\\\\" + path1)),
"UTF-8"));
String lineTxt = null;
while ((lineTxt = br.readLine()) != null) {//数据以逗号分隔
String[] names = lineTxt.split("");
for (String name : names) {
if (map.keySet().contains(name.toLowerCase())) {
map.put(name.toLowerCase(), (map.get(name.toLowerCase()) + 1));
} else {
map.put(name.toLowerCase(), 1);
}
}
}
br.close();
} catch (Exception e) {
System.err.println("read errors :" + e);
}
int sum = 0;
Object[] arrays = map.values().toArray();
for (int i = 0; i < arrays.length; i++) {
sum += (int) arrays[i];
}
Map<String, double[]> range_map = new HashMap<String, double[]>();
double low = 0;
double[] range = null;
for (String s : map.keySet()) {
range = new double[]{low, low + (map.get(s) * 1.0) / sum};
low += (map.get(s) * 1.0) / sum;
range_map.put(s, range);
}
return range_map;
}
}
3.运行结果
加密后,能够看到0.413326042805926@3的结果,@之前的数字表示单词的编码结果,@之后的数字表示单词长度,进行译码时,这两部分都是必不可少的。