根据学生样本数据计算性别概率
要求输出:
- (1)穿裙子学生属于男生的概率、属于女生的概率;
- (2)短头发学生属于男生的概率、属于女生的概率;
- (3)穿裙子、短头发属于男生的概率、属于女生的概率;
- (4)穿长裤、长头发属于男生的概率、属于女生的概率.
import pandas as pd
data=pd.read_csv('data/data54571/StudentSample.csv')
pd.DataFrame(data)
y = data['Sex']
pro_y = y.value_counts(normalize=True).to_dict()
x1 = data['Clouthes']
final = {}
for c_k in pro_y.keys():
subpro = {}
for a_j in x1.unique():
num = len(data[data['Sex'] == c_k])
num2 = len(data[(data['Clouthes'] == a_j) & (data['Sex'] == c_k)])
pro = num2 / num
subpro[a_j] = pro
final[c_k] = subpro
result_df = pd.DataFrame(final)
print(result_df)
y = data['Sex']
pro_y2 = y.value_counts(normalize=True).to_dict()
x2 = data['Hair']
final2 = {}
for c_k in pro_y2.keys():
subpro2 = {}
for a_j in x2.unique():
num3 = len(data[data['Sex'] == c_k])
num4 = len(data[(data['Hair'] == a_j) & (data['Sex'] == c_k)])
pro2 = num4 / num3
subpro2[a_j] = pro2
final2[c_k] = subpro2
result_df2 = pd.DataFrame(final2)
print(result_df2)
joint_probability = {}
for c_k in pro_y.keys():
joint_prob = {}
for a_j in x1.unique():
for b_j in x2.unique():
num_y = len(data[data['Sex'] == c_k])
num_x1 = len(data[(data['Clouthes'] == a_j) & (data['Sex'] == c_k)])
num_x2 = len(data[(data['Hair'] == b_j) & (data['Sex'] == c_k)])
prob = (num_x1 / num_y) * (num_x2 / num_y)
joint_prob[(a_j, b_j)] = prob
joint_probability[c_k] = joint_prob
for c_k, joint_prob in joint_probability.items():
print(f"性别为'{c_k}'时候的条件概率:")
for (a_j, b_j), prob in joint_prob.items():
print(f"P(衣着='{a_j}', 头发='{b_j}' | 性别='{c_k}') = {prob:f}")
print("\n")