# 贝叶斯公式

P(A/B)-----称为后验概率
P(A)-------称为先验概率
P(B/A)/P(B)-----调整因子

# 案例

def py2compat(name):
try:
name = name.decode('utf-8')
except:
pass
return name

class Gusser(object):
def __init__(self):

self.male_total = 0
self.female_total = 0
self.freq = {}
with open('charfreq.csv','rb') as f:
next(f)                                        # 返回 f 的下一行
for line in f:
line = line.decode('utf-8')
char,male,female = line.split(',')
char = py2compat(char)
self.male_total += int(male)               # 男人数累加每一个‘字’
self.female_total += int(female)           # 女人数累加每一个‘字’
self.freq[char] = (int(female),int(male))  # 返回一个字典 {‘字’：(有这个字男人数，有这个字的女人数)}
self.total = self.male_total + self.female_total

for char in self.freq:
female,male = self.freq[char]

# 改变freq这个字典 为  {‘字’:(男人中出现这个的频率，女人中出现这个字的频率)}
self.freq[char] = (1.* female / self.female_total
, 1.* male / self.male_total)

def prob_for_gender(self,firstname,gender = 0):
""""""
p = 1. * self.female_total / self.total \
if gender == 0 \
else 1.* self.male_total / self.total

for char in firstname:
p *= self.freq.get(char)[gender]
return p

def guss(self,name):
name = py2compat(name)
firstname = name[1:]
for char in firstname:
assert u'\u4e00' <= char <= u'\u9fa0', u'姓名必须为中文'

pf = self.prob_for_gender(firstname,0)
pm = self.prob_for_gender(firstname,1)

if pm > pf:
return ('male',1.* pm / (pm + pf))
elif pm < pf:
return ('female',1.* pf/(pm + pf))
else:
return ('unknow',0)

gusser = Gusser()
gusser.guss('嬴政')


('male', 0.9341976700488538)


12-02
09-22 1585
01-07 3万+
04-13 3524
05-18 44
07-27 7万+
06-10 184