1.白葡萄酒数据分析(white_wine.csv)。
(1)查看白葡萄酒共分为几种品质(quality)等级
(2)按白葡萄酒等级将数据集划分为7个子集,统计在每个品质的样本量
(3)计算每个数据集中固定酸度(fixed acidity)的均值
import csv
f = open("C:\\Users\\XXXY-STD\\Downloads\\white_wine.csv","r")
reader = csv.reader(f)
data = []
for row in reader:
data.append(row)
for i in range(5):
print(data[i])
f.close()
# 查看共几个等级
quality_list = []
for row in data[1:]:
quality_list.append(int(row[-1]))
quality_count = set(quality_list)
print("白葡萄酒共有%d种等级,分别为:%r"%(len(quality_count),quality_count))
print("\n")
# 将白葡萄酒等级将数据集划分为7个字集,并统计每种等级的数量
print("将白葡萄酒等级将数据集划分为7个字集,并统计每种等级的数量")
content_dict = {}
for row in data[1:]:
quality = int(row[-1])
if quality not in content_dict.keys():
content_dict[quality] = [row]
else:
content_dict[quality].append(row)
for key in content_dict:
print(key,":",len(content_dict[key]))
print("\n")
# 计算每个数据集中fixed acidity 的均值
print("计算每个数据集中fixed acidity 的均值")
mean_list = []
for key,value in content_dict.items():
sum = 0
for row in value:
sum += float(row[0])
mean_list.append((key,sum/len(value)))
# 打印均值
for item in mean_list:
print(item[0],":",item[1])