【办公类-19-01-04】统计孩子中2班名字的同音字（读音、汉字）-CSDN博客

本文链接：https://blog.csdn.net/reasonsummer/article/details/141796519

背景需求：

开学第一天，听搭档和阿姨叫孩子的名字，感觉孩子中间有很多同音字。

为了更好的掌握重复率，我用以前做的几个代码，再次检索班级幼儿的姓氏字同字率、姓氏字同音率，名字同字率、名字同音率。

【办公类-19-01-03】办公中的思考——Python,统计孩子名字的同音字（拼音）_python名称拼音相似的-CSDN博客文章浏览阅读667次。文章讲述了中班班主任运用Python进行姓名拼音分析，以帮助记忆孩子们的名字，特别是处理同姓和同音字的情况。通过将名字转为拼音并进行词频分析，教师能更准确地区分和记忆孩子们的名字，同时这种活动也促进了孩子们对名字和拼音的认识。https://blog.csdn.net/reasonsummer/article/details/129627144https://blog.csdn.net/reasonsummer/article/details/129627144 【教学类-47-02】20240308古诗《春夜喜雨》里的幼儿姓名（同音同字、同音不同字）-CSDN博客文章浏览阅读1k次，点赞13次，收藏11次。【教学类-47-02】20240308古诗《春夜喜雨》里的幼儿姓名（同音同字、同音不同字）https://blog.csdn.net/reasonsummer/article/details/136559314

一、08名字中拼音同音字

'''
幼儿名字中拼音同音字（拼音）
AI对话大师，阿夏
20240902
'''


from pandas import DataFrame, Series
import pandas as pd
import numpy as np
import xlrd
from xpinyin import Pinyin

 # 读取列
worksheet = xlrd.open_workbook(r'C:\Users\jg2yXRZ\OneDrive\桌面\中2班分析\学生名单.xlsx')
sheet_names= worksheet.sheet_names()
print(sheet_names)

for sheet_name in sheet_names:
    sheet = worksheet.sheet_by_name(sheet_name)
    rows = sheet.nrows # 获取行数
    cols = sheet.ncols # 获取列数，尽管没用到
    all_content = []
    cols = sheet.col_values(1) # 获取第二列内容， 数据格式为此数据的原有格式(原：字符串，读取：字符串；  原：浮点数， 读取：浮点数)

    print(cols[1:])
    # ['张三', '李四', '王五刘', '朝气吧'……]
    print(type(cols[1:]))    #查看数据类型 <class 'list'>

listall=[]
for x in cols[1:]:
    for y in x:
        p = Pinyin()
        # result1 = p.get_pinyin('{}'.format(y))
        result1 = p.get_pinyin('{}'.format(y), tone_marks='marks')
        print(result1)
        listall.append(result1)
print(listall)
# ['zhang', 'san', 'li', 'si', 'wang'……]

#     # 'ye-fu-tian'
#     # result2 = p.get_pinyin('叶伏天', tone_marks='marks')
#     # 'yè-fú-tiān'

name_dict = {}
for name in listall:
    # 取出字典中的所有keys值     
    key_list = name_dict.keys()
    # key_list = name_dict[0]
    # print(key_list)
    if name in key_list:
        name_dict[name] += 1
    else:
        name_dict[name] = 1

# # 根据字典中的value值进行倒序排序

name_dict = sorted(name_dict.items(), key=lambda item:item[1], reverse=True)
print(name_dict )

d=[]
for c in range(0,len(name_dict)):
    aa=name_dict[c][0]
    bb=name_dict[c][1]
    print(aa,bb)
    # zhang 1
    # san 1
    # li 1
    # si 1

chén 7
yì 6
yī 5
zǐ 4
wáng 4
zhāng 3
yú 2
yuè 2
bó 2
yǐn 2
yáng 2
wǎn 2
xīn 2
wú 2
xī 2
lǐ 2
táng 2

二、名字中最多的字

'''
幼儿名字同字（汉字）
AI对话大师，阿夏
20240902
'''
from pandas import DataFrame, Series
import pandas as pd
import numpy as np
import xlrd


 # 读取列
worksheet = xlrd.open_workbook(r'C:\Users\jg2yXRZ\OneDrive\桌面\中2班分析\学生名单.xlsx')
sheet_names= worksheet.sheet_names()
print(sheet_names)

for sheet_name in sheet_names:
    sheet = worksheet.sheet_by_name(sheet_name)
    rows = sheet.nrows # 获取行数
    cols = sheet.ncols # 获取列数，尽管没用到
    all_content = []
    cols = sheet.col_values(1) # 获取第二列内容， 数据格式为此数据的原有格式(原：字符串，读取：字符串；  原：浮点数， 读取：浮点数)

    print(cols[1:])
    print(cols[1:])
    print(type(cols[1]))    #查看数据类型

# name_list = ["张三", "李四", "周瑜", "张三", "张三", "李四", "王五", "张飞", "张飞", "周瑜"]

#提取第一个姓（目前没有复姓，所以都取第一个姓）
b = []
for i in cols[1:]:    # cols[1:]不要第一行的”教职工名字“
    # for c in i：   #在所有的名字中的第1-4个字开始遍历（包括姓氏）
    for c in i[1:]:  # 在所有的名字中的第2个字开始遍历（不包括姓氏）
        print(c)
        b.append(c)
print(b)

name_dict = {}
for name in b:
    # 取出字典中的所有keys值     
    key_list = name_dict.keys()
    # key_list = name_dict[0]
    # print(key_list)
    if name in key_list:
        name_dict[name] += 1
    else:
        name_dict[name] = 1

# # 根据字典中的value值进行倒序排序

name_dict = sorted(name_dict.items(), key=lambda item:item[1], reverse=True)
print(name_dict )

d=[]
for c in range(0,len(name_dict)):
    aa=name_dict[c][0]
    bb=name_dict[c][1]
    print(aa,bb)

一 4
奕 2
博 2
子 2
梓 2
宸 2
晨 2
亦 2

其余的字都只出现过一次

三、姓氏的重复字


'''
目标：中2班幼儿姓氏与名字的出现频率（汉字版）
作者：阿夏
时间：2024年9月1日'''

from pandas import DataFrame, Series
import pandas as pd
import numpy as np
import xlrd


 # 读取列
worksheet = xlrd.open_workbook(r'C:\Users\jg2yXRZ\OneDrive\桌面\中2班分析\学生名单.xlsx')
sheet_names= worksheet.sheet_names()
print(sheet_names)

for sheet_name in sheet_names:
    sheet = worksheet.sheet_by_name(sheet_name)
    rows = sheet.nrows # 获取行数
    cols = sheet.ncols # 获取列数，尽管没用到
    all_content = []
    cols = sheet.col_values(1) # 获取第二列内容， 数据格式为此数据的原有格式(原：字符串，读取：字符串；  原：浮点数， 读取：浮点数)

    print(cols)
    print(cols[1:])    # 不要第1行的标题的文字
    print(type(cols[1]))    #查看数据类型


print('--第3类：名字重复（不考虑声调）-------')
# name_list = ["张三", "李四", "周瑜", "张三", "张三", "李四", "王五", "张飞", "张飞", "周瑜"]

#提取第一个姓（目前没有复姓，所以都取第一个姓）
b1 = []
for i1 in cols[1:]:    # 第1行的教职工姓名不要，从1（第二行开始计数）
    for i2 in i1[1:]:   
        b1.append(i2)
# print(b) ["张", "李", "周", "张", "张", "李”]

name_dict1 = {}
for name1 in b1:
    # 取出字典中的所有keys值     
    key_list1 = name_dict1.keys()
    # key_list = name_dict[0]
    # print(key_list)
    if name1 in key_list1:
        name_dict1[name1] += 1
    else:
        name_dict1[name1] = 1

# # 根据字典中的value值进行倒序排序

name_dict1 = sorted(name_dict1.items(), key=lambda item:item[1], reverse=True)
print(name_dict1)

d1=[]
for c1 in range(0,len(name_dict1)):
    aa1=name_dict1[c1][0]
    bb1=name_dict1[c1][1]
    print(aa1,bb1)

print('--第1类：名字长度3个名字2个名字-------')
# 名字的长度 3字长度的名字 2字长度的名字
three = []
two=[]
for x in cols[1:]:    # 第1行的教职工姓名不要，从1（第二行开始计数）
    if len(x)==3:        # 如果名字等于3个字
        three.append(x)
    if len(x)==2:    # 如果名字等于2个字
        two.append(x)
print(three)
print('3个名字的孩子',len(three),'人\n')
print(two)
print('2个名字的孩子',len(two),'人\n')

# print(b) ["张", "李", "周", "张", "张", "李”]


print('--第2类：姓氏重复（不考虑声调）-------')
# name_list = ["张三", "李四", "周瑜", "张三", "张三", "李四", "王五", "张飞", "张飞", "周瑜"]

#提取第一个姓（目前没有复姓，所以都取第一个姓）
b = []
for i in cols[1:]:    # 第1行的教职工姓名不要，从1（第二行开始计数）
    a=i[0]
    b.append(a)
print(b) 
# ["张", "李", "周", "张", "张", "李”]

name_dict = {}
for name in b:
    # 取出字典中的所有keys值     
    key_list = name_dict.keys()
    # key_list = name_dict[0]
    # print(key_list)
    if name in key_list:
        name_dict[name] += 1
    else:
        name_dict[name] = 1
print(name_dict)

# # 根据字典中的value值进行倒序排序

name_dict = sorted(name_dict.items(), key=lambda item:item[1], reverse=True)
print(name_dict )

# for nn in name_dict:
#     print(nn)

d=[]
for c in range(0,len(name_dict)):
    aa=name_dict[c][0]
    bb=name_dict[c][1]
    print(aa,bb)

print('--第3类：名字重复（不考虑声调）-------')
# name_list = ["张三", "李四", "周瑜", "张三", "张三", "李四", "王五", "张飞", "张飞", "周瑜"]

#提取第一个姓（目前没有复姓，所以都取第一个姓）
b1 = []
for i1 in cols[1:]:    # 第1行的教职工姓名不要，从1（第二行开始计数）
    for i2 in i1[1:]:   
        b1.append(i2)
# print(b) ["张", "李", "周", "张", "张", "李”]

# name_dict1 = {}
# for name1 in b1:
#     # 取出字典中的所有keys值     
#     key_list1 = name_dict1.keys()
#     # key_list = name_dict[0]
#     # print(key_list)
#     if name1 in key_list1:
#         name_dict1[name1] += 1
#     else:
#         name_dict1[name1] = 1

# # # 根据字典中的value值进行倒序排序

# name_dict1 = sorted(name_dict1.items(), key=lambda item:item[1], reverse=True)
# print(name_dict )

# d1=[]
# for c1 in range(0,len(name_dict1)):
#     aa1=name_dict1[c1][0]
#     bb1=name_dict1[c1][1]
#     print(aa1,bb1)

王 4
张 3
余 2
尹 2
吴 2
陈 2
李 2
唐 2

四、姓氏和名字中的拼音

 

'''
目标：中2班幼儿姓氏与名字的出现频率（拼音声调版）
作者：阿夏
时间：2024年9月2日'''

from pandas import DataFrame, Series
import pandas as pd
import numpy as np
import xlrd
from xpinyin import Pinyin

print('------ 读取姓氏或名字（不考虑声调）----------')
worksheet = xlrd.open_workbook(r'C:\Users\jg2yXRZ\OneDrive\桌面\中2班分析\学生名单.xlsx')
sheet_names= worksheet.sheet_names()
# print(sheet_names)
# 中4
 
for sheet_name in sheet_names:
    sheet = worksheet.sheet_by_name(sheet_name)
    rows = sheet.nrows # 获取行数
    cols = sheet.ncols # 获取列数，尽管没用到
    all_content = []
    cols = sheet.col_values(1) # 获取第二列内容， 数据格式为此数据的原有格式(原：字符串，读取：字符串；  原：浮点数， 读取：浮点数)
    # print(cols[1:])
#     # # ['张三', '李四', '王五刘', '朝气吧'……]
#     # print(type(cols[1:]))    #查看数据类型 <class 'list'>
#     # 所有的汉字列表


listall=[]
for x in cols[1:]:
    for y in x[1:]:
        p = Pinyin()
        result1 = p.get_pinyin('{}'.format(y))
        # print(result1)
        listall.append(result1)
# print(listall)
# ['zhang', 'san', 'li', 'si', 'wang'……]
# 所有拼音，无声调

name_dict = {}
for name in listall:
    # 取出字典中的所有keys值     
    key_list = name_dict.keys()
    # key_list = name_dict[0]
    # print(key_list)
    if name in key_list:
        name_dict[name] += 1
    else:
        name_dict[name] = 1
# 所有拼音，无声调

 
# # 根据字典中的value值进行倒序排序
 
name_dict = sorted(name_dict.items(), key=lambda item:item[1], reverse=True)
print(name_dict )
 
d=[]
for c in range(0,len(name_dict)):
    aa=name_dict[c][0]
    bb=name_dict[c][1]
    print(aa,bb)
    # 拼音 无声调 数量


   
print('------ 读取姓氏或名字（考虑声调）----------')
    
# from pandas import DataFrame, Series
# import pandas as pd
# import numpy as np
# import xlrd
# from xpinyin import Pinyin
 
 # 读取列
worksheet = xlrd.open_workbook(r'C:\Users\jg2yXRZ\OneDrive\桌面\中2班分析\学生名单.xlsx')
sheet_names= worksheet.sheet_names()
print(sheet_names)
# ['中4']

 
for sheet_name in sheet_names:
    sheet = worksheet.sheet_by_name(sheet_name)
    rows = sheet.nrows # 获取行数
    cols = sheet.ncols # 获取列数，尽管没用到
    all_content = []
    cols = sheet.col_values(1) # 获取第二列内容， 数据格式为此数据的原有格式(原：字符串，读取：字符串；  原：浮点数， 读取：浮点数)
 
    # print(cols[1:])
    # # ['张三', '李四', '王五刘', '朝气吧'……]
    # print(type(cols[1:]))    #查看数据类型 <class 'list'>
    # 汉字姓名
 
listall=[]
for x in cols[1:]:
    # for y in x[0]:        # 0=只要姓氏
    for y in x[1:]:        # 0=只要名
        p = Pinyin()
        # result1 = p.get_pinyin('{}'.format(y))
        result1 = p.get_pinyin('{}'.format(y), tone_marks='marks')
        print(result1)
        listall.append(result1)
print(listall)
# ['zhang', 'san', 'li', 'si', 'wang'……]
 
#     # 'ye-fu-tian'
#     # result2 = p.get_pinyin('叶伏天', tone_marks='marks')
#     # 'yè-fú-tiān'
 
name_dict = {}
for name in listall:
    # 取出字典中的所有keys值     
    key_list = name_dict.keys()
    # key_list = name_dict[0]
    # print(key_list)
    if name in key_list:
        name_dict[name] += 1
    else:
        name_dict[name] = 1
 
# # 根据字典中的value值进行倒序排序
 
name_dict = sorted(name_dict.items(), key=lambda item:item[1], reverse=True)
print(name_dict )
 
d=[]
for c in range(0,len(name_dict)):
    aa=name_dict[c][0]
    bb=name_dict[c][1]
    print(aa,bb)
    # zhang 1
    # san 1
    # li 1
    # si 1

# # ————————————————
# # 版权声明：本文为CSDN博主「阿夏reasonsummer」的原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接及本声明。
# # 原文链接：https://blog.csdn.net/reasonsummer/article/details/129627144