题目:
import numpy as np
import pandas as pd
#定义函数,用于计算各户型的数量
def all_house(house_array): # 统计df 某一列数据出现的次数
arr = np.array(house_array)
key = np.unique(house_array)
result = {}
for k in key:
mark = (arr == k) # 一个值与整个数组比较,返回一个布尔类型的数组
arr_new = arr[mark]
v = arr_new.size # 统计dataframe中有多少行
result[k] = v
return result
file_path = open("zfsj_group.csv", encoding="utf-8")
file_data = pd.read_csv(file_path)
house_array = file_data["户型"]
hourse1 = all_house(house_array)
a = hourse1.keys() # 获取键
b = hourse1.values() # 获取值
df = pd.DataFrame(data=None) # 创建一个空的DataFrame
df['户型'] = a # 添加户型列
df['数量'] = b # 添加数量列
df.sort_values(by=['数量'],ascending=False,inplace=True) # 按照数量降序排列
df = df[df['数量']>50] # 筛选出数量大于50的
df_house_info=df.reset_index(drop=True) # 重新设置索引
df_house_info.to_csv("zfsj4_after.csv",encoding="utf-8",header=True) # 将dataframe转为csv文件