import os
import json
from collections import Counter
# 按字符计数
label_dir="/Users/thy/Downloads/chinese20240613"
zi_ls=[]withopen(os.path.join(label_dir,"Label.txt"))as f:
lines=f.readlines()for line in lines:
line = line.strip("\r\n")# print("line:",line)
line1=line.split("\t")# print("line1:", line1[1])
json_str = json.loads(line1[1])# 提取所有转录文本
transcriptions =[item["transcription"]for item in json_str]print(transcriptions)
transcriptions1=[]for trans in transcriptions:iflen(trans)==1:
transcriptions1.append(trans)else:
trans=[char for char in trans]for tran in trans:
transcriptions1.append(tran)# print(transcriptions1)for zi in transcriptions1:
zi_ls.append(zi)# print("出现的字符串:",set(zi_ls))
char_counts = Counter(zi_ls)# 打印结果for char, count in char_counts.items():print(f"字符 '{char}' 出现了 {count} 次")# 如果需要将结果存储到字典,可以直接使用char_counts# 输出字典内容print(char_counts)
字符出现的次数
import os
import json
from collections import Counter
# 按字符计数
label_dir="/Users/thy/Downloads/chinese20240613"
zi_ls=[]withopen(os.path.join(label_dir,"Label.txt"))as f:
lines=f.readlines()for line in lines:
line = line.strip("\r\n")# print("line:",line)
line1=line.split("\t")# print("line1:", line1[1])
json_str = json.loads(line1[1])# 提取所有转录文本
transcriptions =[item["transcription"]for item in json_str]# print(transcriptions)
transcriptions1=[]for trans in transcriptions:
transcriptions1.append(trans)print(transcriptions1)for zi in transcriptions1:
zi_ls.append(zi)print("出现的字符串:",set(zi_ls))
char_counts = Counter(zi_ls)# 打印结果for char, count in char_counts.items():print(f"字符 '{char}' 出现了 {count} 次")# 如果需要将结果存储到字典,可以直接使用char_counts# 输出字典内容print(char_counts)