《Python自然语言处理（第二版）-Steven Bird等》学习笔记：第10章分析句子的意思

本文链接：https://blog.csdn.net/weixin_43935926/article/details/86531393

import nltk

如何能表示自然语言的意思，使计算机能够处理这些表示？
怎样才能将意思表示与无限的句子集合关联？
怎样才能使用连接意思表示与句子的程序来存储知识？

10.1 自然语言理解

查询数据库

nltk.data.show_cfg('grammars/book_grammars/sql0.fcfg')

% start S
S[SEM=(?np + WHERE + ?vp)] -> NP[SEM=?np] VP[SEM=?vp]
VP[SEM=(?v + ?pp)] -> IV[SEM=?v] PP[SEM=?pp]
VP[SEM=(?v + ?ap)] -> IV[SEM=?v] AP[SEM=?ap]
NP[SEM=(?det + ?n)] -> Det[SEM=?det] N[SEM=?n]
PP[SEM=(?p + ?np)] -> P[SEM=?p] NP[SEM=?np]
AP[SEM=?pp] -> A[SEM=?a] PP[SEM=?pp]
NP[SEM='Country="greece"'] -> 'Greece'
NP[SEM='Country="china"'] -> 'China'
Det[SEM='SELECT'] -> 'Which' | 'What'
N[SEM='City FROM city_table'] -> 'cities'
IV[SEM=''] -> 'are'
A[SEM=''] -> 'located'
P[SEM=''] -> 'in'

from nltk import load_parser

cp = load_parser('grammars/book_grammars/sql0.fcfg')

query = 'What cities are located in China'

trees = list(cp.parse(query.split()))

answer = trees[0].label()['SEM']

answer = [s for s in answer if s]

q = ' '.join(answer)

print(q)

SELECT City FROM city_table WHERE Country="china"

from nltk.sem import chat80

rows = chat80.sql_query('corpora/city_database/city.db', q)

for r in rows: print(r[0], end=" ")

canton chungking dairen harbin kowloon mukden peking shanghai sian tientsin

自然语言、语义和逻辑

10.2 命题逻辑

nltk.boolean_ops()

negation       	-
conjunction    	&
disjunction    	|
implication    	->
equivalence    	<->

read_expr = nltk.sem.Expression.fromstring

read_expr('-(P & Q)')

<NegatedExpression -(P & Q)>

read_expr('P & Q')

<AndExpression (P & Q)>

read_expr('P | (R -> Q)')

<OrExpression (P | (R -> Q))>

read_expr('P <-> -- P')

<IffExpression (P <-> --P)>

lp = nltk.sem.Expression.fromstring

SnF = read_expr('SnF')

NotFnS = read_expr('-FnS')

R = read_expr('SnF -> -FnS')

prover = nltk.Prover9()

# prover.prove(NotFnS, [SnF, R])

val = nltk.Valuation([('P', True), ('Q', True), ('R', False)])

val['P']

True

dom = set([])

g = nltk.Assignment(dom)

m = nltk.Model(dom, val)

print(m.evaluate('(P & Q)', g))

True

print(m.evaluate('-(P & Q)', g))

False

print(m.evaluate('(P & R)', g))

False

print(m.evaluate('(P | R)', g))

True

10.3 一阶逻辑

read_expr = nltk.sem.Expression.fromstring

expr = read_expr('walk(angus)', type_check=True)

expr.argument

<ConstantExpression angus>

expr.argument.type

expr.function

<ConstantExpression walk>

expr.function.type

<e,?>

sig = {'walk': '<e, t>'}

expr = read_expr('walk(angus)', signature=sig)

expr.function.type

read_expr = nltk.sem.Expression.fromstring

read_expr('dog(cyril)').free()

set()

read_expr('dog(x)').free()

{Variable('x')}

read_expr('own(angus, cyril)').free()

set()

read_expr('exists x.dog(x)').free()

set()

read_expr('((some x. walk(x)) -> sing(x))').free()

{Variable('x')}

read_expr('exists x.own(y, x)').free()

{Variable('y')}

一阶定理证明

NotFnS = read_expr('-north_of(f, s)')

SnF = read_expr('north_of(s, f)')

R = read_expr('all x. all y. (north_of(x, y) -> -north_of(y, x))')

prover = nltk.Prover9()

#prover.prove(NotFnS, [SnF, R])

FnS = read_expr('north_of(f, s)')

#prover.prove(FnS, [SnF, R])

一阶逻辑语言总结

真值模型

dom = {'b', 'o', 'c'}

v = """
    bertie => b
    olive => o
    cyril => c
    boy => {b}
    girl => {o}
    dog => {c}
    walk => {o, c}
    see => {(b, o), (c, b), (o, c)}
    """

val = nltk.Valuation.fromstring(v)

print(val)

{'bertie': 'b',
 'boy': {('b',)},
 'cyril': 'c',
 'dog': {('c',)},
 'girl': {('o',)},
 'olive': 'o',
 'see': {('o', 'c'), ('b', 'o'), ('c', 'b')},
 'walk': {('o',), ('c',)}}

('o', 'c') in val['see']

True

('b',) in val['boy']

True

独立变量和赋值

g = nltk.Assignment(dom, [('x', 'o'), ('y', 'c')])

{'x': 'o', 'y': 'c'}

print(g)

g[c/y][o/x]

m = nltk.Model(dom, val)

m.evaluate('see(olive, y)', g)

True

g['y']

'c'

m.evaluate('see(y, x)', g)

False

g.purge()

{}

m.evaluate('see(olive, y)', g)

'Undefined'

m.evaluate('see(bertie, olive) & boy(bertie) & -walk(bertie)', g)

True

量化

m.evaluate('exists x.(girl(x) & walk(x))', g)

True

m.evaluate('girl(x) & walk(x)', g.add('x', 'o'))

True

fmla1 = read_expr('girl(x) | boy(x)')

m.satisfiers(fmla1, 'x', g)

{'b', 'o'}

fmla2 = read_expr('girl(x) -> walk(x)')

m.satisfiers(fmla2, 'x', g)

{'b', 'c', 'o'}

fmla3 = read_expr('walk(x) -> girl(x)')

m.satisfiers(fmla3, 'x', g)

{'b', 'o'}

m.evaluate('all x.(girl(x) -> walk(x))', g)

True

量词范围歧义

v2 = """
    bruce => b
    elspeth => e
    julia => j
    matthew => m
    person => {b, e, j, m}
    admire => {(j, b), (b, b), (m, e), (e, m)}
    """

val2 = nltk.Valuation.fromstring(v2)

dom2 = val2.domain

m2 = nltk.Model(dom2, val2)

g2 = nltk.Assignment(dom2)

fmla4 = read_expr('(person(x) -> exists y.(person(y) & admire(x, y)))')

m2.satisfiers(fmla4, 'x', g2)

{'b', 'e', 'j', 'm'}

fmla5 = read_expr('(person(y) & all x.(person(x) -> admire(x, y)))')

m2.satisfiers(fmla5, 'y', g2)

set()

fmla6 = read_expr('(person(y) & all x.((x = bruce | x = julia) -> admire(x, y)))')

m2.satisfiers(fmla6, 'y', g2)

{'b'}

模型的建立

a3 = read_expr('exists x.(man(x) & walks(x))')

c1 = read_expr('mortal(socrates)')

c2 = read_expr('-mortal(socrates)')

mb = nltk.Mace(5)

#print(mb.build_model(None, [a3, c1]))

#print(mb.build_model(None, [a3, c2]))

#print(mb.build_model(None, [c1, c2]))

a4 = read_expr('exists y. (woman(y) & all x. (man(x) -> love(x,y)))')
a5 = read_expr('man(adam)')
a6 = read_expr('woman(eve)')
g = read_expr('love(adam,eve)')
mc = nltk.MaceCommand(g, assumptions=[a4, a5, a6])
#mc.build_model()

#print(mc.valuation)

a7 = read_expr('all x. (man(x) -> -woman(x))')
g = read_expr('love(adam,eve)')
mc = nltk.MaceCommand(g, assumptions=[a4, a5, a6, a7])
# mc.build_model()
# print(mc.valuation)

10.4 英语句子的语义

λ演算

read_expr = nltk.sem.Expression.fromstring

expr = read_expr(r'\x.(walk(x) & chew_gum(x))')

expr

<LambdaExpression \x.(walk(x) & chew_gum(x))>

expr.free()

set()

print(read_expr(r'\x.(walk(x) & chew_gum(y))'))

\x.(walk(x) & chew_gum(y))

expr = read_expr(r'\x.(walk(x) & chew_gum(x))(gerald)')

print(expr)

\x.(walk(x) & chew_gum(x))(gerald)

print(expr.simplify())

(walk(gerald) & chew_gum(gerald))

print(read_expr(r'\x.\y.(dog(x) & own(y, x))(cyril)').simplify())

\y.(dog(cyril) & own(y,cyril))

print(read_expr(r'\x y.(dog(x) & own(y, x))(cyril, angus)').simplify())

(dog(cyril) & own(angus,cyril))

expr1 = read_expr('exists x.P(x)')

print(expr1)

exists x.P(x)

expr2 = expr1.alpha_convert(nltk.sem.Variable('z'))

print(expr2)

exists z.P(z)

expr1 == expr2

True

expr3 = read_expr('\P.(exists x.P(x))(\y.see(y, x))')

print(expr3)

(\P.exists x.P(x))(\y.see(y,x))

print(expr3.simplify())

exists z1.see(z1,x)

量化的NP

及物动词

read_expr = nltk.sem.Expression.fromstring

tvp = read_expr(r'\X x.X(\y.chase(x,y))')

np = read_expr(r'(\P.exists x.(dog(x) & P(x)))')

vp = nltk.sem.ApplicationExpression(tvp, np)

print(vp)

(\X x.X(\y.chase(x,y)))(\P.exists x.(dog(x) & P(x)))

print(vp.simplify())

\x.exists z2.(dog(z2) & chase(x,z2))

from nltk import load_parser

parser = load_parser('grammars/book_grammars/simple-sem.fcfg', trace=0)

sentence = 'Angus gives a bone to every dog'

tokens = sentence.split()

for tree in parser.parse(tokens):
    print(tree.label()['SEM'])

all z4.(dog(z4) -> exists z3.(bone(z3) & give(angus,z3,z4)))

sents = ['Irene walks', 'Cyril bites an ankle']

grammar_file = 'grammars/book_grammars/simple-sem.fcfg'

for results in nltk.interpret_sents(sents, grammar_file):
    for (synrep, semrep) in results:
        print(synrep)

(S[SEM=<walk(irene)>]
  (NP[-LOC, NUM='sg', SEM=<\P.P(irene)>]
    (PropN[-LOC, NUM='sg', SEM=<\P.P(irene)>] Irene))
  (VP[NUM='sg', SEM=<\x.walk(x)>]
    (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks)))
(S[SEM=<exists z5.(ankle(z5) & bite(cyril,z5))>]
  (NP[-LOC, NUM='sg', SEM=<\P.P(cyril)>]
    (PropN[-LOC, NUM='sg', SEM=<\P.P(cyril)>] Cyril))
  (VP[NUM='sg', SEM=<\x.exists z5.(ankle(z5) & bite(x,z5))>]
    (TV[NUM='sg', SEM=<\X x.X(\y.bite(x,y))>, TNS='pres'] bites)
    (NP[NUM='sg', SEM=<\Q.exists x.(ankle(x) & Q(x))>]
      (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] an)
      (Nom[NUM='sg', SEM=<\x.ankle(x)>]
        (N[NUM='sg', SEM=<\x.ankle(x)>] ankle)))))

v = """
    bertie => b
    olive => o
    cyril => c
    boy => {b}
    girl => {o}
    dog => {c}
    walk => {o, c}
    see => {(b, o), (c, b), (o, c)}
    """

val = nltk.Valuation.fromstring(v)

g = nltk.Assignment(val.domain)

m = nltk.Model(val.domain, val)

sent = 'Cyril sees every boy'

grammar_file = 'grammars/book_grammars/simple-sem.fcfg'

results = nltk.evaluate_sents([sent], grammar_file, m, g)[0]

for (syntree, semrep, value) in results:
    print(semrep)
    print(value)

all z6.(boy(z6) -> see(cyril,z6))
True

再述量词歧义

from nltk.sem import cooper_storage as cs

sentence = 'every girl chases a dog'

trees = cs.parse_with_bindops(sentence, grammar='grammars/book_grammars/storage.fcfg')

semrep = trees[0].label()['SEM']

cs_semrep = cs.CooperStore(semrep)

print(cs_semrep.core)

chase(z2,z3)

for bo in cs_semrep.store:
    print(bo)

bo(\P.all x.(girl(x) -> P(x)),z2)
bo(\P.exists x.(dog(x) & P(x)),z3)

cs_semrep.s_retrieve(trace=True)

Permutation 1
   (\P.all x.(girl(x) -> P(x)))(\z2.chase(z2,z3))
   (\P.exists x.(dog(x) & P(x)))(\z3.all x.(girl(x) -> chase(x,z3)))
Permutation 2
   (\P.exists x.(dog(x) & P(x)))(\z3.chase(z2,z3))
   (\P.all x.(girl(x) -> P(x)))(\z2.exists x.(dog(x) & chase(z2,x)))

for reading in cs_semrep.readings:
    print(reading)

exists x.(dog(x) & all z9.(girl(z9) -> chase(z9,x)))
all x.(girl(x) -> exists z10.(dog(z10) & chase(x,z10)))

10.5 段落语义层

段落表示理论

read_dexpr = nltk.sem.DrtExpression.fromstring

drs1 = read_dexpr('([x, y], [angus(x), dog(y), own(x, y)])')

print(drs1)

([x,y],[angus(x), dog(y), own(x,y)])

drs1.draw()

print(drs1.fol())

exists x y.(angus(x) & dog(y) & own(x,y))

drs2 = read_dexpr('([x], [walk(x)]) + ([y], [run(y)])')

print(drs2)

(([x],[walk(x)]) + ([y],[run(y)]))

drs3 = read_dexpr('([], [(([x], [dog(x)]) -> ([y],[ankle(y), bite(x, y)]))])')

print(drs3.fol())

all x.(dog(x) -> exists y.(ankle(y) & bite(x,y)))

drs4 = read_dexpr('([x, y], [angus(x), dog(y), own(x, y)])')

drs5 = read_dexpr('([u, z], [PRO(u), irene(z), bite(u, z)])')

drs6 = drs4 + drs5

print(drs6.simplify())

([u,x,y,z],[angus(x), dog(y), own(x,y), PRO(u), irene(z), bite(u,z)])

print(drs6.simplify().resolve_anaphora())

([u,x,y,z],[angus(x), dog(y), own(x,y), (u = [x,y,z]), irene(z), bite(u,z)])

from nltk import load_parser

parser = load_parser('grammars/book_grammars/drt.fcfg', logic_parser=nltk.sem.drt.DrtParser())

trees = list(parser.parse('Angus owns a dog'.split()))

print(trees[0].label()['SEM'].simplify())

([x,z12],[Angus(x), dog(z12), own(x,z12)])

段落处理

dt = nltk.DiscourseTester(['A student dances', 'Every student is a person'])

#dt.readings()

#dt.add_sentence('No person dances', consistchk=True)

#dt.retract_sentence('No person dances', verbose=True)

#dt.add_sentence('A person dances', informchk=True)

from nltk.tag import RegexpTagger

tagger = RegexpTagger(
    [('^(chases|runs)$', 'VB'),
    ('^(a)$', 'ex_quant'),
    ('^(every)$', 'univ_quant'),
    ('^(dog|boy)$', 'NN'),
    ('^(He)$', 'PRP')
    ])

#rc = nltk.DrtGlueReadingCommand(depparser=nltk.MaltParser(tagger=tagger))

#dt = nltk.DiscourseTester(['Every dog chases a boy', 'He runs'], rc)

#dt.readings()

#dt.readings(show_thread_readings=True)

#dt.readings(show_thread_readings=True, filter=True)

10.6 小结

一阶逻辑是一种适合在计算环境中表示自然语言的含义的语言，因为它很灵活，足以表示自然语言含义的很多有用的方面，具有使用一阶逻辑推理的高效的定理证明器。（同样的，自然语言语义中也有各种各样的现象，需要更强大的逻辑机制。）
在将自然语言句子翻译成一阶逻辑的同时，我们可以通过检查一阶公式模型表述这些句子的真值条件。
为了构建成分组合的意思表示，我们为一阶逻辑补充了λ-演算。
λ-演算中的β-约简在语义上与函数传递参数对应。句法上，它包括将被函数表达式中的λ绑定的变量替换为函数应用中表达式提供的参数。
构建模型的一个关键部分在于建立估值，为非逻辑常量分配解释。这些被解释为n 元谓词或独立常量。
一个开放表达式是一个包含一个或多个自变量的表达式。开放表达式只在它的自变量被赋值时被解释。
量词的解释是对于具有变量x 的公式φ[x]，构建个体的集合，赋值g 分配它们作为x的值使φ[x]为真。然后量词对这个集合加以约束。
一个封闭的表达式是一个没有自由变量的表达式。也就是，变量都被绑定。一个封闭的表达式是真是假取决于所有变量赋值。
如果两个公式只是由绑定操作符（即λ或量词）绑定的变量的标签不同，那么它们是α-等价。重新标记公式中的绑定变量的结果被称为α-转换。
给定有两个嵌套量词Q1 和Q2 的公式，最外层的量词Q1 有较广的范围（或范围超出Q2）。英语句子往往由于它们包含的量词的范围而产生歧义。
在基于特征的文法中英语句子可以通过将SEM 作为特征与语义表达关联。一个复杂的表达式的SEM 值通常包括成分表达式的SEM 值的函数应用。

致谢
《Python自然语言处理》¹²³ ⁴，作者：Steven Bird, Ewan Klein & Edward Loper，是实践性很强的一部入门读物，2009年第一版，2015年第二版，本学习笔记结合上述版本，对部分内容进行了延伸学习、练习，在此分享，期待对大家有所帮助，欢迎加我微信（验证：NLP），一起学习讨论，不足之处，欢迎指正。
在这里插入图片描述

参考文献

http://nltk.org/ ↩︎
Steven Bird, Ewan Klein & Edward Loper,Natural Language Processing with Python,2009 ↩︎
（英）伯德，（英）克莱因，（美）洛普，《Python自然语言处理》，2010年，东南大学出版社 ↩︎
Steven Bird, Ewan Klein & Edward Loper,Natural Language Processing with Python,2015 ↩︎