Python实现分组排序
需求1:按照药品名称(ADVICE_CONTENT)分组,排序。
第一种实现方式
import pandas as pd
base_dire = './result/xx.csv'
result_file = './result/drug_group_desc.csv'
df = pd.read_csv(base_dire,engine='python',encoding='utf-8',usecols=['ADVICE_CONTENT'])
#第一种方式使用size函数添加每一组出现的次数,类似sql中的count;
drug_name_group_desc = df.groupby('ADVICE_CONTENT').size().reset_index(name='count').sort_values(by=['count'],ascending=(False))
#第二种方式使用count函数添加每一组出现的次数,类似sql中的count;
# drug_name_group_desc = df.groupby('ADVICE_CONTENT')['ADVICE_CONTENT'].count().reset_index(name='count').sort_values(by=['count'],ascending=(True))
drug_name_group_desc.to_csv(result_file, index=False)
'''
思路:
1.按照"药品名称"分组 2.使用size函数获得组别个数 3.reset_index 重置索引,添加count列 4.按照count列降序
学习:
1.ascending=(True) 升序,ascending=(False) 降序
2.groupby('ADVICE_CONTENT') 按照一个字段分组
3.groupby(by=['ADVICE_CONTENT','count']) 按照两个字段分组
#Python技巧之对DataFrame进行多列排序
https://blog.csdn.net/m0_37637511/article/details/79901071
#pandas之分组groupby()的使用整理与总结
https://blog.csdn.net/FrankieHello/article/details/97272990
'''
第二种实现方式
import pandas as pd
base_dire = './result/xx.csv'
result_file = './result/drug_group_desc.csv'
#定义DataFrame列名
df_colums = ['ADVICE_CONTENT','count']
#定义1个DataFrame
df_result = pd.DataFrame(columns=df_colums)
#engine='python' 读取csv报错,所以加了这个参数;encoding='utf-8' 指定编码;usecols参数通过"列名"获取数据;
df = pd.read_csv(base_dire,engine='python',encoding='utf-8',usecols=['ADVICE_CONTENT'])
df2= df['ADVICE_CONTENT'].value_counts(ascending=False) #默认降序 ascending=True 为升序
# print(type(df2))
#遍历1个Series
for index,value in df2.items():
drug_name = index
count = value
#定义1个DataFrame
df_temp = pd.DataFrame([[drug_name,count]],columns=df_colums)
#使用append函数,往”df_result“添加DataFrame数据
df_result = df_result.append(df_temp, ignore_index=True)
df_result.to_csv(result_file,index=False)
需求2:按照药品名称(ADVICE_CONTENT)分组,根据patient_id排序。
import pandas as pd
base_dire = './result/xx.csv'
result_file = './result/drug_group_desc.csv'
df = pd.read_csv(base_dire,engine='python',encoding='utf-8',usecols=['ADVICE_CONTENT','patient_id'])
df.drop_duplicates(inplace=True) #去重
# df2 = df.groupby(['ADVICE_CONTENT']).size().reset_index(name='count').sort_values(by=['count'],ascending=(False))
df2 = df.groupby(['ADVICE_CONTENT'])['ADVICE_CONTENT'].count().reset_index(name='count').sort_values(by=['count'],ascending=(False)) #降序,ascending=True为升序
#转成1个列表
# print(df2['ADVICE_CONTENT'].tolist())
# print(df2['count'].tolist())
df2.to_csv(result_file, index=False)