1. 实验目的
- 了解Java语言中非线性数据结构的使用;
- 掌握在非线性数据结构基础上,设计与实现特定算法的能力;
实验题目1-使用哈夫曼树实现文本编码、解码程序
4.1分析指定的字符串,统计每个字符出现的频率,计算每个字符的权重,实现哈夫曼编码程序,计算出每个字符对应的二进制编码,并输出。
4.2 根据上一步计算得到的字符二进制编码,对给定的字符串进行编码/解码。
实验题目2
将接口定义SerDes.java拷贝到第二次试验题目工程中
3.1 编写程序,实现SerDes接口。完成对第二次题目中实现的数据结构对象的序列化与反序列化。
实验代码
结点:
public class HTNode implements Comparable<HTNode>,Serializable{
public enum Code{
ZERO('0'), ONE('1');
private char code;
private Code(char c){
this.code = c;
}
public char getCode(){
return code;
}
}
/**
* 哈夫曼树的叶子结点数据
*/
private char data;
/**
* 结点的编码,只有0和1两种可能
*/
private Code code;
public static final char zero = '0';
public static final char one = '1';
String huffmanCode="";
/**
* @return 返回 huffmanCode 属性的值
*/
public String getHuffmanCode() {
return huffmanCode;
}
/**
* @param huffmanCode 将属性huffmanCode的值设置为huffmanCode
*/
public void setHuffmanCode(String huffmanCode) {
this.huffmanCode = huffmanCode;
}
private double weight;
private HTNode lchild;
private HTNode rchild;
private boolean isLeaf;
public char getData() {
return data;
}
public void setData(char data) {
this.data = data;
}
public double getWeight() {
return weight;
}
public void setWeight(double weight) {
this.weight = weight;
}
public HTNode getLchild() {
return lchild;
}
public void setLchild(HTNode lchild) {
this.lchild = lchild;
}
public HTNode getRchild() {
return rchild;
}
public void setRchild(HTNode rchild) {
this.rchild = rchild;
}
public boolean isLeaf() {
return isLeaf;
}
public void setLeaf(boolean isLeaf) {
this.isLeaf = isLeaf;
}
public Code getCode() {
return code;
}
public void setCode(Code code) {
this.code = code;
}
@Override
public int compareTo(HTNode o) {
if(this.weight<o.weight){
return -1;
}else{
return 1;
}
}
}
接口实现:
public class HTSerDes implements SerDes {
@Override
public byte[] serBin(Serializable t) {
// TODO Auto-generated method stub
ByteArrayOutputStream tree = new ByteArrayOutputStream();
ObjectOutputStream out;
try {
out = new ObjectOutputStream(tree);
out.writeObject(t);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return tree.toByteArray();
}
@Override
public String serTxt(Serializable t) {
// TODO Auto-generated method stub
return null;
}
@Override
public Serializable des(byte[] bin) {
// TODO Auto-generated method stub
HuffmanTree tree = null;
ByteArrayInputStream in = new ByteArrayInputStream(bin);
try {
ObjectInputStream input = new ObjectInputStream(in);
tree = (HuffmanTree) input.readObject();
} catch (Exception e) {
e.printStackTrace();
}
return tree;
}
@Override
public Serializable des(String text) {
// TODO Auto-generated method stub
return null;
}
@Override
public boolean serToFile(Serializable t, String path, String file) {
// TODO Auto-generated method stub
byte[] bytes = this.serBin(t);
FileOutputStream out = null;
try {
out = new FileOutputStream(file);
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
out.write(bytes);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return true;
}
@Override
public Serializable desFromFile(String path, String file) {
// TODO Auto-generated method stub
ByteArrayOutputStream out = new ByteArrayOutputStream(file.length());
BufferedInputStream in = null;
try {
in = new BufferedInputStream(new FileInputStream(file));
byte[] buffer = new byte[1024];
while (in.read(buffer) > 0) {
out.write(buffer);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// this.des(out.toByteArray());
return this.des(out.toByteArray());
}
}
接口:
**
* 序列化与反序列化方法接口,包含六个方法:
* byte[] serBin(T t) :将对象t序列化为字节数组
* String serText(T t) :将对象t序列化为一个字符串(可以先使用serBin序列化为字节数组,再用Base64编码为字符串)
* T des(byte[] bin) :将序列化后的字节数组反序列化为一个对象
* T des(String text) :将序列化后的字符串反序列化为一个对象
* serToFile :将对象序列化并写入磁盘文件
* desFromFile :将序列化后的对象从磁盘文件中读出
* @author
*
*/
public interface SerDes<T extends Serializable> {
/**
* 将对象t序列化为字节数组
* @param t
* @return 序列化后的字节数组
*/
public byte[] serBin(T t);
/**
* 将对象t序列化为一个字符串。
* 提示:可以先使用serBin方法将对象t序列化为字节数组,
* 再将字节数组用Base64编码为字符串
* @param t
* @return
*/
public String serTxt(T t);
/**
* 将序列化后的字节数组反序列化为一个对象
* @param bin
* @return
*/
public T des(byte[] bin);
/**
* 将序列化后的字符串反序列化为一个对象,
* 字符串应该是使用serText方法序列化得到的
* @param text
* @return
*/
public T des(String text);
/**
* 将对象序列化并写入磁盘文件。
* 提示:可以使用serBin将对象t序列化,
* 然后将序列化后的字节数组写入文件
* @param t
* @param path
* @param file
* @return
* @throws IOException
* @throws FileNotFoundException
*/
public boolean serToFile(T t, String path, String file);
/**
* 将序列化后的对象从磁盘文件中读出。
* 提示:可以首先从磁盘中读出字节数组,
* 然后使用des方法将对象反序列化
* @param path
* @param file
* @return
*/
public T desFromFile(String path, String file);
}
Huffman树实现:
/**
* 哈夫曼树实现
* @author
*
*/
public class HuffmanTree implements Serializable {
/**
*
*/
private static final long serialVersionUID = -5688613168418556504L;
/**
* 哈夫曼编码
*/
private static Map<Character, String> code = null;
/**
* 生成的huffman树根结点
*/
private HTNode tree = null;
/**
* 根据初始的结点列表,建立哈夫曼树,
* 并生成哈夫曼编码,保存在当前类的code对象中,
* 生成的树根结点,被保存在当前类的tree对象中。
* 可以反复生成哈夫曼树,每次重新构建树,将更新编码
* @param nodes
* @return
*/
public HTNode buildTree(List<HTNode> nodes){
while(nodes.size()>1){
Collections.sort(nodes);
HTNode max1Node = nodes.get(0); //最小
HTNode max2Node = nodes.get(1);//次小
HTNode parent =new HTNode();
parent.setLchild(max1Node);
parent.setRchild(max2Node);
parent.setWeight(max1Node.getWeight()+max2Node.getWeight());
parent.setLeaf(false); // 设置为不是叶子结点
nodes.remove(0);
nodes.remove(0);
nodes.add(parent);
}
this.tree =nodes.get(0);
return tree;
}
/**
* 根据已建立的哈夫曼树根结点,生成对应的字符编码,
* 字符编码应为0,1字符串
* @param tree
* @return
*/
public static Map<Character, String> getCode(HTNode tree){
code=new HashMap<Character,String>();
LinkedList<HTNode> list=new LinkedList<HTNode>();
list.add(tree);
for(int i=0;i<list.size();i++){
HTNode parents=list.get(i);
if(!parents.isLeaf()){
if(parents.getLchild()!=null)
parents.getLchild().setHuffmanCode(parents.getHuffmanCode()+"0");
if(parents.getRchild()!=null)
parents.getRchild().setHuffmanCode(parents.getHuffmanCode()+"1");
list.add(parents.getLchild());
list.add(parents.getRchild());
}
else{
code.put(parents.getData(), parents.getHuffmanCode());
}
}
return code;
}
/**
* 获取已建立的哈夫曼树生成的字符编码,
* 字符编码应为0,1字符串
* @return
*/
public Map<Character, String> getCode(){
code=new HashMap<Character,String>();
LinkedList<HTNode> list=new LinkedList<HTNode>();
list.add(this.tree);
for(int i=0;i<list.size();i++){
HTNode parents=list.get(i);
if(!parents.isLeaf()){
if(parents.getLchild()!=null)
parents.getLchild().setHuffmanCode(parents.getHuffmanCode()+"0");
if(parents.getRchild()!=null)
parents.getRchild().setHuffmanCode(parents.getHuffmanCode()+"1");
list.add(parents.getLchild());
list.add(parents.getRchild());
}
else{
code.put(parents.getData(), parents.getHuffmanCode());
}
}
return code;
}
/**
* 统计字符串中字符出现的频率
* @param text
* @return
*/
public static Map<Character,Integer> computeCharCount(String text){
Map<Character,Integer> map=new HashMap<Character,Integer>();
for(int i=0;i<text.length();i++){
char c =text.charAt(i);
//获得每个字符对应的个数
Integer count =map.get(c);
if(count==null)
map.put(c, 1); //第一次出现
else
map.put(c, count + 1);
}
return map;
}
/**
* 使用当前类训练好的huffman编码来对文本进行编码
* @return
*/
public String encode(String text){
String str="";
for(int i=0;i<text.length();i++){
if(code.containsKey(text.charAt(i))){
str+=code.get(text.charAt(i));
}
}
return str;
}
/**
* 使用指定的huffman编码来对文本进行编码
* @return
*/
public static String encode(String text, Map<Character, String> code){
String str="";
for(int i=0;i<text.length();i++){
if(code.containsKey(text.charAt(i))){
str+=code.get(text.charAt(i));
}
}
return str;
}
/**
* 使用当前类中训练好的huffman编码,
* 对编码后的文本进行解码
* @param text
* @return
*/
public String decode(String text){
String str="";
HTNode headHtNode =tree;//记录根节点
for(int i=0;i<text.length();i++){ //寻找一条从根节点到叶子的路径
if(text.charAt(i)=='0')
tree=this.tree.getLchild();
else
tree=this.tree.getRchild();
if(tree.isLeaf())
{
str+=tree.getData();
tree=headHtNode;
} }
return str;
}
public HTNode getTree() {
return tree;
}
/**
* 使用预先建立好的huffman树,
* 对编码后的文本进行解码
* @param text
* @return
*/
public String decode(String text, HTNode tree){
// String str="";
// HTNode headHtNode =tree;//记录根节点
// for(int i=0;i<text.length();i++){
// if(text.charAt(i)=='0')
// tree=this.tree.getLchild();
// else
// tree=this.tree.getRchild();
// if(tree.isLeaf())
// {
// str+=tree.getData();
// tree=headHtNode;
// } }
// return str;
return null;
}
}
测试:
public class TestMain {
/**
*
*/
private static final String data = "In computer science and information theory, "
+ "a Huffman code is a particular type of optimal prefix code that is commonly used for lossless data compression. "
+ "The process of finding and/or using such a code proceeds by means of Huffman coding, "
+ "an algorithm developed by David A. Huffman while he was a Ph.D. student at MIT, and published in the 1952 paper "
+ "\"A Method for the Construction of Minimum-Redundancy Codes\".[1] "
+ "The output from Huffman's algorithm can be viewed as a variable-length code table for encoding a source symbol "
+ "(such as a character in a file). The algorithm derives this table from the estimated probability or frequency of occurrence"
+ " (weight) for each possible value of the source symbol. As in other entropy encoding methods, more common symbols are generally "
+ "represented using fewer bits than less common symbols. Huffman's method can be efficiently implemented, "
+ "finding a code in linear time to the number of input weights if these weights are sorted.[2] However, "
+ "although optimal among methods encoding symbols separately, Huffman coding is not always optimal among all compression methods.";
static void testBasic(){
HuffmanTree htree = new HuffmanTree();
// 首先对字符串中的字符出现次数进行统计
Map<Character, Integer> chars = HuffmanTree.computeCharCount(data);
long length = data.length();
ArrayList<HTNode> nodes = new ArrayList<>();
for (Character c : chars.keySet()) {
HTNode node = new HTNode();
node.setData(c);
node.setWeight(chars.get(c));
node.setLchild(null);
node.setRchild(null);
// node.setDepth(0);// 所有叶子结点的深度都为0
node.setLeaf(true);// 初始结点都是叶子结点,合并后的结点都不是叶子结点
nodes.add(node);
}
htree.buildTree(nodes);
Map<Character, String> code = htree.getCode();
double weightedLength = 0;// 哈夫曼树编码的加权长度
double theoryWeightedLength = 0;// 根据信息熵公式计算得到的理论长度
double p = 0;
for (Character c : code.keySet()) {
System.out.println("字符'"+c+"'的哈夫曼编码:"+code.get(c));
p = (chars.get(c) * 1.0) / length;
weightedLength += code.get(c).length() * p;
theoryWeightedLength += p * Math.log(p);
}
System.out.println(weightedLength);
System.out.println(-theoryWeightedLength);
String text = "abcd";
String coded = htree.encode(text);
System.out.println("字符串:" + text);
System.out.println("被编码为:" + coded);
String oriText = htree.decode(coded);
System.out.println("编码:" + coded);
System.out.println("被解码为:" + oriText);
System.out.println(oriText.equals(text));
System.out.println("编码后二进制字节长度:" + (1.0 * coded.length()) / 8);
System.out.println("原字符串长度" + text.length());
}
static void testSer(){
HuffmanTree htree = new HuffmanTree();
// 首先对字符串中的字符出现次数进行统计
Map<Character, Integer> chars = HuffmanTree.computeCharCount(data);
ArrayList<HTNode> nodes = new ArrayList<>();
for (Character c : chars.keySet()) {
HTNode node = new HTNode();
node.setData(c);
node.setWeight(chars.get(c));
node.setLchild(null);
node.setRchild(null);
// node.setDepth(0);// 所有叶子结点的深度都为0
node.setLeaf(true);// 初始结点都是叶子结点,合并后的结点都不是叶子结点
nodes.add(node);
}
htree.buildTree(nodes);
HTSerDes serde = new HTSerDes();
serde.serToFile(htree, "", "htree");
}
static void testDes(){
HuffmanTree htree = null;
Map<Character, Integer> chars = HuffmanTree.computeCharCount(data);
long length = data.length();
HTSerDes serde = new HTSerDes();
htree = (HuffmanTree) serde.desFromFile("", "htree");
Map<Character, String> code = htree.getCode();
double weightedLength = 0;// 哈夫曼树编码的加权长度
double theoryWeightedLength = 0;// 根据信息熵公式计算得到的理论长度
double p = 0;
for (Character c : code.keySet()) {
System.out.println("字符'"+c+"'的哈夫曼编码:"+code.get(c));
p = (chars.get(c) * 1.0) / length;
weightedLength += code.get(c).length() * p;
theoryWeightedLength += p * Math.log(p);
}
System.out.println(weightedLength);
System.out.println(-theoryWeightedLength);
String text = "abcd";
String coded = htree.encode(text);
System.out.println("字符串:" + text);
System.out.println("被编码为:" + coded);
String oriText = htree.decode(coded);
System.out.println("编码:" + coded);
System.out.println("被解码为:" + oriText);
System.out.println(oriText.equals(text));
System.out.println("编码后二进制字节长度:" + (1.0 * coded.length()) / 8);
System.out.println("原字符串长度" + text.length());
}
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
TestMain.testBasic();//基本测试(实验二)
System.out.println("基本测试成功");
TestMain.testSer();//序列化测试(实验三)
System.out.println("\n序列化成功");
System.out.println("\n\n");
System.out.println("反序列化测试");
TestMain.testDes();//反序列化测试(实验三)
}
}