哈夫曼树的应用之一

写文章的背景,近期学习数据结构和算法,学到哈夫曼树,听老师讲完,自己用PHP实现下加密和解密,做下记录,供自己学习用。(写的不优雅,先实现哈,后期有时间再优化)

<?php
/**
 * Created by PhpStorm.
 * User: sxt
 * Date: 2021/6/25
 * Time: 9:37
 * 使用哈夫曼算法进行字符串压缩加密
 * 1.统计每个字符串出现的次数,构建权重数组 eg:[['key'=>'a','count'=>3],['key'=>'b','count'=>1]]
 * 2.构造哈夫曼数组(id,value,parent,l,r),哈夫曼树使用数组保存
 * 3.构造字符串对应哈夫曼编码表
 * 4.根据表进行加密压缩
 */

$str = 'This is an encryption algorithm written by Shan xiongtao using Huffman tree';
print_r(encrypt($str));

//测试用
$encryp_str = '011110110011111111001011111111001011001001101000100101111101000000110000011011111010100110110011000011110101010100111111011100101100101100010010011111101110100010011011000110000110111000011001100100110111000111110101001111011101100101011010110111100111100111101101000000110101110011100110010010011011101010000010001';
$weight_arr = '[{"key":"T","count":1},{"key":"h","count":3},{"key":"i","count":7},{"key":"s","count":3},{"key":" ","count":11},{"key":"a","count":5},{"key":"n","count":8},{"key":"e","count":4},{"key":"c","count":1},{"key":"r","count":4},{"key":"y","count":2},{"key":"p","count":1},{"key":"t","count":6},{"key":"o","count":4},{"key":"l","count":1},{"key":"g","count":3},{"key":"m","count":2},{"key":"w","count":1},{"key":"b","count":1},{"key":"S","count":1},{"key":"x","count":1},{"key":"u","count":2},{"key":"H","count":1},{"key":"f","count":2}]';
$weight_arr = json_decode($weight_arr,true);

print_r(decrypt($encryp_str,$weight_arr));

/**
 * 加密函数
 * @param $str 要加密压缩的字符串,通过哈夫曼算法加密
 * @return array
 */
function encrypt($str)
{
    if (!$str) {
        return [];
    }

    $weight_arr = str_num($str);//获取权重数组
    $hafuman_arr = hafuman_arr($weight_arr);//获取转换为哈夫曼数组
    $str_code_arr = str_code_arr($hafuman_arr);//获取字符和编码对应数组

    $strlen = strlen($str);
    $encryp_str = '';//加密后的字符串
    for ($i = 0; $i < $strlen; $i++) {
        $encryp_str .= $str_code_arr[$str[$i]];
    }

    //返回的数据
    $data = [
        'str' =>$str,
        'encryp_str' =>$encryp_str,
        'weight_arr' => json_encode($weight_arr),
        //'hafuman_arr' =>$hafuman_arr,
        //'str_code_arr' =>$str_code_arr,
    ];
    return $data;
    // return $str_code_arr;
}

/**
 * 解密主函数
 * @param $encryp_str 要解密的字符串
 * @param $weight_arr 字符串对应的权重数组
 */
function decrypt($encryp_str,$weight_arr){
    if(!$encryp_str || empty($weight_arr)){
        return ' ';
    }
    $encryp_str_arr = str_split($encryp_str);
    //通过权重数组构建哈夫曼数组(哈夫曼数用数组保存)
    $hafuman_arr = hafuman_arr($weight_arr);//获取转换为哈夫曼数组
    $hafuman_arr_root = hafuman_arr_root($hafuman_arr);//获取哈夫曼函数根

    $id = $hafuman_arr_root;//从根开始走
    $str = '';//要返回的字符串
    while (count($encryp_str_arr) > 0 || $id) {//每次处理一个码
        //echo $id;echo '<br>';
        if ($hafuman_arr[$id]['l'] == 0 && $hafuman_arr[$id]['r'] == 0){
            $str .= $hafuman_arr[$id]['value'];
            $id = $hafuman_arr_root;//从根开始走
            if(count($encryp_str_arr) == 0){//end
                $id = '';
            }
        }else{
            if($encryp_str_arr[0] == 0){
                $id = $hafuman_arr[$id]['l'];
            }else{
                $id = $hafuman_arr[$id]['r'];
            }
            unset($encryp_str_arr[0]);//
            $encryp_str_arr = array_values($encryp_str_arr);
        }
    }
    return $str;
}

function hafuman_arr_root($hafuman_arr){
    if (empty($hafuman_arr)) {
        return '';
    }
    $arr_num = count($hafuman_arr);
    $root = $hafuman_arr[$arr_num - 1];
    if ($root['parent'] != 0) {
        foreach ($hafuman_arr as $k => $v) {
            if ($v['parent'] == 0) {
                return $k;
            } else {
                return '';
            }
        }
    }
    return $arr_num - 1;
}

/**
 * 统计字符串中出现的字符,出现次数
 * @param $str
 * @return array
 */
function str_num($str){
    if(!$str){
        return [];
    }

    $strRecord = [];//把出现过的字符记录在此数组中,如果记录有,则不记录,

    $len = strlen($str);
    for ($i = 0; $i < $len; $i++) {
        $found = 0;//默认设置为没有遇到过

        foreach ($strRecord as $k => $v) {
            if ($str[$i] == $v['key']) {
                $strRecord[$k]['count'] += 1;//已经遇到,count + 1;
                $found = 1;//设置已经遇到过的,标记
                continue;//如果已经遇到,不用再循环记录数组了,继续下一个字符串比较
            }
        }

        if (!$found) {//记录没有遇到过的字符串
            $strRecord[] = array('key' => $str[$i], 'count' => 1);
        }
    }

    //处理之后的数组
    /*$data = [];
    foreach ($strRecord as $k => $v) {
        $data[$v['key']] = $v['count'];
    }
    return $data;*/
    return $strRecord;
}

/**
 * 通过哈夫曼算法构造哈夫曼数组
 * @param $weight_arr
 * @return array
 */
function hafuman_arr($weight_arr){
    $lan = count($weight_arr) * 2 - 1;//哈夫曼数组的个数
    $weight_count = count($weight_arr);//权重数组个数

    //先初始化数组
    $hafuman_arr = [];
    for ($i = 0; $i < $lan; $i++) {
        $hafuman_arr[$i]['value'] = $weight_arr[$i]['key'];
        $hafuman_arr[$i]['parent'] = 0;
        $hafuman_arr[$i]['l'] = 0;
        $hafuman_arr[$i]['r'] = 0;
    }

    //处理下数组
    $start_key = $weight_count;//开始要处理的key
    for ($i = 0; $i <= $lan-$weight_count; $i++) {
        if (count($weight_arr) >1) {

            //首先获取两个最小的
            $min_key_1 = min_arr($weight_arr);//第一个最小的key
            $min_1 = $weight_arr[$min_key_1];
            unset($weight_arr[$min_key_1]);//删除旧的

            $weight_arr = array_values($weight_arr);//重新刷新下数组key从0开始
            $min_key_2 = min_arr($weight_arr);//第二个最小的key
            $min_2 = $weight_arr[$min_key_2];
            unset($weight_arr[$min_key_2]);//删除旧的

            //添加新的
            $new_key = $min_1['key'] . $min_2['key'];
            $new_count = $min_1['count']+ $min_2['count'];
            $weight_arr[] = ['key' => $new_key, 'count' => $new_count];
            $weight_arr = array_values($weight_arr);//重新刷新下数组key从0开始

            $hafuman_arr[$start_key]['value'] = $new_key;
            $hafuman_arr[$start_key]['l'] = get_hafuman_order($hafuman_arr, $min_1['key']);
            $hafuman_arr[$start_key]['r'] = get_hafuman_order($hafuman_arr, $min_2['key']);

            $hafuman_arr[get_hafuman_order($hafuman_arr, $min_1['key'])]['parent'] = $start_key;
            $hafuman_arr[get_hafuman_order($hafuman_arr, $min_2['key'])]['parent'] = $start_key;
            $start_key += 1;
        }
    }

    return $hafuman_arr;
  //  return $weight_arr;
}

/**
 * 获取字节与哈夫曼码对应数组
 * @param $arr
 * @return array
 */
function str_code_arr($arr)
{
    $str_code = [];
    $for_num = (count($arr) + 1) / 2 - 1;

    foreach ($arr as $k => $v) {//此处用递归思想做更优
        if ($v['l'] == 0 && $v['r'] == 0 && $v['parent'] != 0) {//只处理叶子节点
            $str_code[$k]['value'] = $v['value'];
            $str_code[$k]['code'] = '';

            $parent = $v['parent'];
            $key = $k;
            for ($i = 0; $i < $for_num; $i++) {
                if ($parent) {
                    $test = get_0_1($key, $arr[$parent]);
                    $str_code[$k]['code'] .= $test['code'];
                    $key = $parent;
                    $parent = $test['parent'];
                }

            }
        }
    }

    $data = [];
    foreach ($str_code as $k => $v) {
        $data[$v['value']] = strrev($v['code']);
    }
    return $data;
}

//求数组中count最小值
function min_arr($arr){
    $min = 0;//最小值的key
    $temp = $arr[0]['count'];//临时值
    for ($i = 1; $i < count($arr); $i++) {
        if ($arr[$i]['count'] < $temp) {
            $temp = $arr[$i]['count'];
            $min = $i;
        }
    }
    return $min;
}

//根据value的值获取在哈夫曼数组中的索引
function get_hafuman_order($hafuman_arr,$str){
    foreach ($hafuman_arr as $k=>$v){
        if($str == $v['value']){
            return $k;
        }
    }
}

//在节点的左边返回0,在节点右边返回1
function get_0_1($id,$arr){
    $return['parent'] = $arr['parent'];
    if ($arr['l'] === $id) {
        $return['code'] = 0;
    } else {
        $return['code'] = 1;
    }
     return $return;
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值