背景说明:
name sex age
sakura f 17
yuki f 16
naka f 16
suzumi m 17
希望基于age为第一key,sex为第二key统计学生数目,得到如下数据
17 f 1
17 m 1
16 f 2
其实此数据可以通过字典表达,即为
{
17: {
f: 1,
m: 1
},
16: {
f: 2
}
}
python代码实现:
名为test.txt的文本内容如下
name sex age
sakura f 17
yuki f 16
naka f 16
suzumi m 17
code
#coding=utf-8
import os
import pprint
# 递归更新字典
def update_dict(d, keys):
"""
:param d: 待更新的字典
:param keys: key的list
:return:
"""
if not isinstance(d,dict):
raise Exception("para d must be a dict!")
if not isinstance(keys,list):
raise Exception("para keys must be a list!")
# 递归终止条件,keys只最后一个key
if len(keys) == 1:
if keys[0] in d: # 如果key存在
d[keys[0]] += 1 # 则+1
else: # 否则初始化
d[keys[0]] = 1
else: # 递归条件
if keys[0] not in d: # 如果当前的key不存在,则初始化,注意此处初始化为空字典{}
d[keys[0]] = {}
update_dict(d[keys[0]],keys[1:]) # 递归,python的参数都是引用参数,因此会改变原始的dict
def static_base_keys_index(lines, keys_index):
"""
:param lines: 行组成的list,行内元素还未分隔
:param keys_index: 作为key的索引,可以任意指定,可以重复
:return:
"""
ret = {}
try:
for line in lines: # 逐行取
elements = line.strip().split() # 对line先去除头尾空白,再基于空白分割
keys = [elements[i] for i in keys_index] # 基于key的index从elements中加载出keys
update_dict(ret, keys) # 基于keys更新ret字典
return ret
except Exception as e:
raise Exception(e)
def read_file_2_lines(file,with_head=False):
"""
:param file: 日志文件路径
:param with_head: 是否包含头部,如果包含,设置为True,会自动取消头部
:return:
"""
if not os.path.exists(file):
raise Exception("file: %s is not exists" % file)
with open(file,"r") as f:
lines = f.read().strip().split("\n")
if with_head:
lines = lines[1:]
return lines
def pretty_show(d, keys_str=""):
"""
:param d: 对统计结果进行漂亮处理
:param keys_str: 默认值即可
:return:
"""
pretty = ""
if not isinstance(d, dict):
pretty += (keys_str + "\t" + `d` + "\n")[1:]
else:
for key in d.keys():
pretty += pretty_show(d[key], keys_str + "\t" + key)
return pretty
if __name__ == "__main__":
lines = read_file_2_lines("test.txt",with_head=True)
static = static_base_keys_index(lines,keys_index=[2,1]) # 表示分别指定第二个和第一个参数为key
print static
print pretty_show(static).strip()
结果:
完美
print static
{'17': {'m': 1, 'f': 1}, '16': {'f': 2}}
print pretty_show(static).strip()
17 m 1
17 f 1
16 f 2
如果我们令keys_index=[1,2]
,则可以得到如下结果
{'m': {'17': 1}, 'f': {'17': 1, '16': 2}}
或者只根据age统计keys_index=[2]
{'17': 2, '16': 2}
只根据sex统计keys_index=[1]
{'m': 1, 'f': 3}