租房(4)
import numpy as np
import pandas as pd
file_path = open("zfsj_group.csv", encoding="utf-8")
file_data = pd.read_csv(file_path)
housetype_data = file_data["户型"]
temp_list = [] #定义空列表
for h in housetype_data: # 遍历
new_info = h.replace('房间','室') #替换字符串
temp_list.append(new_info) #构建替换后的字符串列表
file_data.loc[:, "户型"] = temp_list #修改原有“户型列表”
groupy_area = file_data.groupby(by='户型').count()
groupy_area = groupy_area.reset_index()
groupy_area.rename(columns ={"区域":"数量"},inplace=True)
groupy_area.sort_values(by=['数量'], ascending=False,inplace=True)
groupy_area.index=range(len(groupy_area))
groupy_area.iloc[:12,:2].to_csv("zfsj4_after.csv",encoding="utf-8",header=True)
import numpy as np
import pandas as pd
#定义函数,用于计算各户型的数量
def all_house(house_array):
arr = np.array(house_array)
key = np.unique(house_array)
result = {}
for k in key:
mark = (arr == k)
arr_new = arr[mark]
v = arr_new.size
result[k] = v
return result
file_path = open("zfsj_group.csv", encoding="utf-8")
file_data = pd.read_csv(file_path)
house_array = file_data["户型"]
house_info = all_house(house_array)
house_type = dict((key, value) for key,value in house_info.items() if value >50)
df_house_info = pd.DataFrame({"户型":[x for x in house_type.keys()],
"数量":[x for x in house_type.values()]})
df_house_info.sort_values(by=['数量'], ascending=False,inplace=True)
df_house_info.index=range(len(df_house_info))
df_house_info.to_csv("zfsj4_after.csv",encoding="utf-8",header=True)
from sklearn.datasets import load_wine #分类
wine = load_wine()
print(wine["target"])
ls = [i for i in wine['target_names']]
for i in range(len(ls)):
print("{}标签的名称为{}".format(i,ls[i]))
import numpy as np
import pandas as pd
df = pd.read_excel('./drug_order_detai_1.xlsx', sheet_name='drug_order_detail2')
print('所有分店总销售额是:', df['销量'].sum(), sep='')
df['销售额'] = df['价格'] * df['销量']
print(pd.DataFrame(df.groupby('分店')['销售额'].agg([np.min,np.max,np.mean])))
import pandas as pd
d = {"a":9,"b":8,"c":7,"d":6}
a_Series = pd.Series(d)
print(a_Series)
print(type(a_Series))
#随机种子数取:0x1010
import numpy as np
import pandas as pd
np.random.seed(int(input(), 16))
data = np.random.normal(loc=75, scale=8, size=(40, 4))
df = pd.DataFrame(data.astype(np.intc), index=[i + 1001 for i in range(40)],columns=[chr(i) for i in range(ord('A'), ord('D') + 1)])
print(df.head(5))