要求
已知有键值对<店名,城市>的键值对,我们现在的需求是根据城市来统计店的分布。数据的格式如下:
我们希望输出数据的格式如下所示
所有的数据都是以txt文件存储的。
代码如下
# from collections import Counter
# from pprint import pprint
import os
import csv
import codecs
def getNum(l1):
dic1 = {}
for i in l1:
if i in dic1.keys():
dic1[i] = dic1[i] + 1
else:
dic1[i] = 1
return dic1
def main():
# 统计城市和店名
f2 = open("pos.txt","r",encoding='utf-8')
dic = {}
# dic存储的是[城市,【店1,店2,...】]
for line in f2:
# 所有的数据的格式是 [店名()\t城市\n]
x = line.split('\t')
# x的格式应该是 [店名()\n],[城市]
y0 = x[0].split('(')
# 因为数据里面会存在有可能xx,xx(xx路店),所以我们得只提取店名
y1 = x[1].split('\n')
# 提取城市的姓名
if y1[0] in dic.keys():
dic[y1[0]].append(y0[0])
else:
dic[y1[0]] = []
dic[y1[0]].append(y0[0])
ans = {}
dicnew = {}
with open('params.txt', 'w') as f:
for key, v in dic.items():
dic1 = getNum(v)
f.write(key)
f.write('\n')
for k in sorted(dic1,key=dic1.__getitem__):
f.write(k)
f.write(' ')
f.write(str(dic1[k]))
f.write('\n')
def main1():
# 不管城市,统计全国包子店的分布
f2 = open("pos.txt","r",encoding='utf-8')
dic = {}
for line in f2:
y0 = line.split('(')
y0 = y0[0]
if '\n' in y0:
y0 = y0[0:-1]
else:
y0 = y0
if y0 in dic.keys():
dic[y0] = dic[y0] + 1
else:
dic[y0] = 1
dicnew = {}
for k in sorted(dic,key=dic.__getitem__):
dicnew[k] = dic[k]
with open('params.txt', 'w') as f:
for key, value in dicnew.items():
f.write(key)
f.write(' ')
f.write(str(value))
f.write('\n')
if __name__ == '__main__':
main()
# main1()