复现Personal Information in Passwords and Its Security Implications的部分实验

论文:

    题目:Personal Information in Passwords and Its Security Implications

    链接:https://ieeexplore.ieee.org/abstract/document/7931642

实验重现:

# encoding: utf-8
# author: kaiyouhu


import pandas as pd
import numpy as np
import re


class Passenger:

    # passenger_list = []

    def __init__(self, login_email, password, name, id_number, username, phone, email):
        self.login_email = login_email
        self.password = password
        self.name = name
        self.id_number = id_number
        self.username = username
        self.phone = phone
        self.email = email
        # self.passenger_list = []

    # def add(self, passenger):
    #     self.passenger_list.append(passenger)


def read_data(path):
    with open(path) as f:
        data = f.read()
        informations = data.splitlines()

        for index in range(len(informations)):
            informations[index] = informations[index].split('----')

    # print sum of information
    informations_sum = len(informations)
    print('sum: ' + str(informations_sum))
    table1_header = ['RANK', 'Password', 'Amount', 'Percentage']
    rank_list = list(range(1, 11))
    password_list = ['123456', 'a123456', '123456a', '5201314', '111111',
                     'woaini1314', 'qq123456', '123123', '000000', '1qaz2wsx']
    amount_list1 = []
    percentage_list1 = []

    for index, password in enumerate(password_list):
        count = 0
        for information in informations:
            if information[1] == password:
                count += 1
        amount_list1.append(count)

    for amount in amount_list1:
        percentage_list1.append(float(amount/informations_sum))

    result = np.array(list(zip(rank_list, password_list, amount_list1, percentage_list1)), order='C')
    df1 = pd.DataFrame(result, columns=table1_header)
    print(df1)

    structure_list = ['D7', 'D8', 'D6', 'L2D7', 'L3D6', 'L1D7', 'L2D6', 'L3D7', 'D9', 'L2D8']
    structure_list_regex = ['^\d{7}$', '^\d{8}$', '^\d{6}$', '^[a-zA-Z]{2}\d{7}$', '^[a-zA-Z]{3}\d{6}$',
                            '^[a-zA-Z]{1}\d{7}$', '^[a-zA-Z]{2}\d{6}$', '^[a-zA-Z]{3}\d{7}$',
                            '^\d{9}$', '^[a-zA-Z]{2}\d{8}$']
    amount_list2 = []
    percentage_list2 = []

    for password_index, structure_regex in enumerate(structure_list_regex):
        count = 0
        for index, information in enumerate(informations):
            if re.match(structure_regex, str(information[1])):
                count += 1
        amount_list2.append(count)

    for amount in amount_list2:
        percentage_list2.append(float(amount / informations_sum))

    result2 = np.array(list(zip(rank_list, structure_list, amount_list2, percentage_list2)), order='C')
    df2 = pd.DataFrame(result2, columns=table1_header)
    print(df2)

    rank_list = list(range(1, 7))
    information_type_list = ['Birthdate', 'AccountName', 'Name', 'Email', 'IDNumber', 'CellPhone']
    amount_list2 = []
    percentage_list2 = []

    for index, information_type in enumerate(information_type_list):
        count = 0
        for information in informations:
            if information_type == 'Birthdate':
                if information[1].find(information[3][6:14]) != -1:
                    count += 1
            elif information_type == 'AccountName':
                if information[1].find(information[4]) != -1:
                    count += 1
            elif information_type == 'Name':
                # are you kidding?
                if information[1].find(information[4]) != -1:
                    count += 1
            elif information_type == 'Email':
                if information[1].find(information[0].split('@')[0]) != -1:
                    count += 1
            elif information_type == 'IDNumber':
                if information[4].find(information[1]) != -1:
                    count += 1
            elif information_type == 'CellPhone':
                if information[1].find(information[5]) != -1:
                    count += 1
        amount_list2.append(count)

    for amount in amount_list2:
        percentage_list2.append(float(amount / informations_sum))

    result = np.array(list(zip(rank_list, information_type_list, amount_list2, percentage_list2)), order='C')
    df1 = pd.DataFrame(result, columns=table1_header)
    print(df1)

    pass


read_data('../data/12306.txt')

输出结果:

sum: 131653
  RANK    Password Amount             Percentage
0    1      123456    392  0.0029775242493524645
1    2     a123456    281  0.0021343987603776593
2    3     123456a    165  0.0012532946457733587
3    4     5201314    161  0.0012229117452697623
4    5      111111    157  0.0011925288447661656
5    6  woaini1314    136  0.0010330186171222835
6    7    qq123456     98  0.0007443810623381161
7    8      123123     98  0.0007443810623381161
8    9      000000     97  0.0007367853372122169
9   10    1qaz2wsx     93  0.0007064024367086204
  RANK Password Amount            Percentage
0    1       D7  10906   0.08283897822305607
1    2       D8   9458    0.0718403682407541
2    3       D6   9102   0.06913629009593401
3    4     L2D7   5073  0.038533113563686355
4    5     L3D6   4832  0.036702543808344666
5    6     L1D7   4778   0.03629237465154611
6    7     L2D6   4275   0.03247172491321884
7    8     L3D7   3885  0.029509392114118176
8    9       D9   3594  0.027299036102481522
9   10     L2D8   3371  0.025605189399406016
  RANK     Password Amount             Percentage
0    1    Birthdate   5726     0.0434931220708985
1    2  AccountName   2565   0.019483034947931303
2    3         Name   2565   0.019483034947931303
3    4        Email   3979   0.030223390275952694
4    5     IDNumber   6835    0.05191678123552065
5    6    CellPhone     89  0.0006760195362050238

Process finished with exit code 0

备注:

    1实验数据自己去百度网盘找下载的,大概14M的txt,共131653条数据(有的版本可能会上下差一些条数,但基本上差不多)

    2只重现了第三个表格,前面两个基本上数据差不多,第三个,我感觉不太理解,也不知道作者具体怎么实现的(匹配细节不知道),后面的我懒得去编写输出了

substring← get_all_substring(pwd) 
reverse_length_sort(substring)
for eachstring ∈ substring do 
if len(eachstring) ≥ 2 then 
if matchbd(eachstring,infolist) then

这里按照作者代码的理解思路是,获取密码的全部长度大于等于2的子串,然后去和身份证信息,电话号码等匹配,我就呵呵了

    3论文的内容看看就好了,个人感觉过程有点水,结论得到地太草率(本人愚钝之见,不要太在意)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值