第十七届全国大学生信息安全竞赛（初赛）Crypto部分hash题解_十七届全国大学生信息安全竞赛题解-CSDN博客

本文链接：https://blog.csdn.net/althumi/article/details/139077709

文章目录

题目描述
解题思路

题目描述

你能仅仅通过一个Python2.7自带的hash函数的输出，计算出它的原象的sha384哈希值吗？
附件在github上就可以下载
下载链接 hash

解题思路

将压缩包解压后，含有两个文件，分别是 hash.py 、 output.txt 。

hash.py

#!/usr/bin/python2
# Python 2.7 (64-bit version)
from secret import flag
import os, binascii, hashlib
key = os.urandom(7)
print hash(key)
print int(hashlib.sha384(binascii.hexlify(key)).hexdigest(), 16) ^ int(binascii.hexlify(flag), 16)

output.txt

7457312583301101235
13903983817893117249931704406959869971132956255130487015289848690577655239262013033618370827749581909492660806312017

output.txt 的两行数据分别是 hash.py 中的两行输出。

根据源代码逻辑，可以知道首要问题是如何将 $k ey$ 的密文，解密出原文。

通过查询 python2.7 的内置 hash 函数，可以搜索到相关信息：

python3 中的 hash 函数相对于 python2 ，不同在于 python3 中会对要加密的字符串的运算添加 prefix 和 suffix ，而 python2 默认不会添加。
通过 github 上 python2.7 开源代码，找到 python2.7 中 str 类型的 hash 计算源码，部分代码如下：

static long
string_hash(PyStringObject *a)
{
    register Py_ssize_t len;
    register unsigned char *p;
    register long x;

#ifdef Py_DEBUG
    assert(_Py_HashSecret_Initialized);
#endif
    if (a->ob_shash != -1)
        return a->ob_shash;
    len = Py_SIZE(a);
    /*
      We make the hash of the empty string be 0, rather than using
      (prefix ^ suffix), since this slightly obfuscates the hash secret
    */
    if (len == 0) {
        a->ob_shash = 0;
        return 0;
    }
    p = (unsigned char *) a->ob_sval;
    x = _Py_HashSecret.prefix;
    x ^= *p << 7;
    while (--len >= 0)
        x = (1000003*x) ^ *p++;
    x ^= Py_SIZE(a);
    x ^= _Py_HashSecret.suffix;
    if (x == -1)
        x = -2;
    a->ob_shash = x;
    return x;
}

将其逻辑再编写成一个简易的函数方便测试，代码如下：

ll h(char *s, ll len) {
    ll res = 0;
    res ^= (s[0] << 7LL);
    for(int i = 0; i < len; ++i) {
        res = (res * 1000003ull) ^ (unsigned )s[i];
    }
    res ^= len;
    return res;
}

现在进行解密算法的寻找，可以解密关键在于将表达式
$res \times 1000003) \oplus s_i$
通过已知的 $res$ 解出 $re s^{'}$ 和 $s_i$ ，存在两个未知数，无法直接解出，所以我们需要穷举出结果。

但是直接穷举 $s_0\cdots s_6$ ，显然不可能， $s_i \in [0, 255]$ ，也就是直接穷举需要穷举 $255^7$ 次。

关注加密算法中已知量现在有两个，分别是最后加密结果 $res = 7457312583301101235$ 和最初加密结果 $res = 0$ ，那么可以通过先枚举后三个字符 $s_7s_6s_5$ 的所有结果集 $A\{res_4|res_4 \to(s_5,s_6,s_7)\}$ 存储到哈希表中，在枚举前四个字符 $s_1s_2s_3s_4$ 的所有结果集 $B\{res_4| res_4 \to (s_1,s_2,s_3,s_4)\}$ ，对于结果集 $B$ 查询结果集 $A$ 是否存在 $res_4$ 相等的情况，存在则两个集合的字符拼在一起就是 key 的原文。

解密代码如下：

vector<unsigned> uh(ull d) {
    d ^= 7;
    ull res = d;
    
    for(ull i7 = 0; i7 < 256u; ++i7)
    for(ull i6 = 0; i6 < 256u; ++i6)
    for(ull i5 = 0; i5 < 256u; ++i5) {
        ull res_6 = (res ^ i7) * iv; //iv 是1000003在模数2^64下的逆元
        ull res_5 = (res_6 ^ i6) * iv;
        ull res_4 = (res_5 ^ i5) * iv;
        vector<int> a(3);
        a[0] = i7;
        a[1] = i6;
        a[2] = i5;
        Hashmap[res_4] = a;
    }

    for(ull i1 = 0; i1 < 256u; ++i1)
    for(ull i2 = 0; i2 < 256u; ++i2)
    for(ull i3 = 0; i3 < 256u; ++i3)
    for(ull i4 = 0; i4 < 256u; ++i4) {
        ull res_1 = ((i1 << 7) * v) ^ i1; //v 是1000003
        ull res_2 = (res_1 * v) ^ i2;
        ull res_3 = (res_2 * v) ^ i3;
        ull res_4 = (res_3 * v) ^ i4;
        if(Hashmap.find(res_4)!=Hashmap.end()){
            return vector<unsigned>{i1,i2,i3,i4,Hashmap[res_4][2], Hashmap[res_4][1], Hashmap[res_4][0]};
        }
    }
}

具体公式推导涉及同余方程、逆元等知识。

代码运行完毕得到密文的 ascii 为 93 140 240 63 90 8 82

key = ']\x8c\xf0?Z\x08R'
pre = int(hashlib.sha384(binascii.hexlify(key)).hexdigest(), 16)
ans = 13903983817893117249931704406959869971132956255130487015289848690577655239262013033618370827749581909492660806312017
flag = ans ^ pre
flag = hex(flag)
print flag