Exercises - Natural Language Processing with Python (Chapter9)

_Meilinger_

于 2020-11-22 08:54:07 发布

阅读量162

点赞数 1

分类专栏：碎片笔记文章标签： nlp 自然语言处理 python

本文链接：https://blog.csdn.net/qq_36332660/article/details/109920121

版权

碎片笔记专栏收录该内容

49 篇文章 4 订阅

订阅专栏

import nltk

# 1
grammar = nltk.data.load('9_1_1.fcfg')
print(grammar)
tokens_1 = "I am happy".split()
tokens_2 = "she is happy".split()
tokens_3 = "she am happy".split()

parser = nltk.load_parser('9_1_1.fcfg')
for tree in parser.parse(tokens_1):
    print(tree)
for tree in parser.parse(tokens_2):
    print(tree)

parser = nltk.load_parser('9_1_2.fcfg')
for tree in parser.parse(tokens_1):
    print(tree)
for tree in parser.parse(tokens_2):
    print(tree)

# 9_1_1.fcfg
% start S
S   ->   PropN[PER=?n] VP[PER=?n]
NP[PER=?n] -> PropN[PER=?n]
VP[PER=?n] -> PRED[PER=?n] ADJ

PropN[PER=1] -> 'I'
PropN[PER=3] -> 'she'
PRED[PER=1] -> 'am'
PRED[PER=3] -> 'is'
ADJ -> 'happy'

# 9_1_2.fcfg
% start S
S                    -> NP[AGR=?n] VP[AGR=?n]
NP[AGR=?n]           -> PropN[AGR=?n]
VP[TENSE=?t, AGR=?n] -> Cop[TENSE=?t, AGR=?n] Adj

PropN[AGR=[NUM=sg, PER=3]]            -> 'she'
PropN[AGR=[NUM=sg, PER=1]]            -> 'I'
Cop[TENSE=pres,  AGR=[NUM=sg, PER=3]] -> 'is'
Cop[TENSE=pres,  AGR=[NUM=sg, PER=1]] -> 'am'
Adj                                   -> 'happy'


# 2
tokens_1_1 = "The boy sings".split()
tokens_1_2 = "Boy sings".split()

tokens_2_1 = "The boys sing".split()
tokens_2_2 = "Boys sing".split()

tokens_3_1 = "The water is precious".split()
tokens_3_2 = "Water is precious".split()

parser = nltk.load_parser('9_2.fcfg')
for tree in parser.parse(tokens_1_1):
    print(tree)
for tree in parser.parse(tokens_1_2):
    print(tree)
for tree in parser.parse(tokens_2_1):
    print(tree)
for tree in parser.parse(tokens_2_2):
    print(tree)
for tree in parser.parse(tokens_3_1):
    print(tree)
for tree in parser.parse(tokens_3_2):
    print(tree)

# 9_2.fcfg
% start S

    S[] -> NP[COUNT=?n] VP[COUNT=?n] | NP[COUNT=?n]

    NP[COUNT=?n] -> N[COUNT=?n]
    NP[COUNT=?n] -> Det[] N[COUNT=?n]
    NP[COUNT='pl'] -> N[COUNT='pl']
    VP[COUNT=?n, TENSE=?t] -> IV[COUNT=?n, TENSE=?t] | COP[] ADJ[]

    Det[] -> 'The'
    N[COUNT='less'] -> 'water' | 'Water'
    N[COUNT='sg'] -> 'boy' | 'Boy'
    N[COUNT='pl'] -> 'boys' | 'Boys'
    IV[COUNT='sg', TENSE='pres'] -> 'sings'
    IV[COUNT='pl', TENSE='pres'] -> 'sing'
    COP[] -> 'is'
    ADJ[] -> 'precious'


# 3
def subsumes(fs1, fs2):
    if fs1.unify(fs2) == fs2:
        return True


fs1 = nltk.FeatStruct(NUMBER=74)
fs2 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal')

print(subsumes(fs1, fs2))


# 4
tokens = "the student from France with good grades walks".split()
parser = nltk.load_parser('9_4.fcfg')
for tree in parser.parse(tokens):
    print(tree)

# 9_4.fcfg
% start S

S -> N[BAR=2] VP[TENSE=?t, NUM=?n]
N[BAR=2] -> Det N[BAR=1]
N[BAR=1] -> N[BAR=1] P[BAR=2]
N[BAR=1] -> N[BAR=0] P[BAR=2]
N[BAR=1] -> N[BAR=0]
P[BAR=2] -> PREP N[BAR=0] | PREP ADJ N[BAR=0]

VP[TENSE=?t, NUM=?n] -> V[SUBCAT=intrans, TENSE=?t, NUM=?n]
VP[TENSE=?t, NUM=?n] -> V[SUBCAT=trans, TENSE=?t, NUM=?n] NP
VP[TENSE=?t, NUM=?n] -> V[SUBCAT=clause, TENSE=?t, NUM=?n] SBar
SBar -> Comp S

V[SUBCAT=intrans, TENSE=pres, NUM=sg] -> 'disappears' | 'walks'
V[SUBCAT=trans, TENSE=pres, NUM=sg] -> 'sees' | 'likes'
V[SUBCAT=clause, TENSE=pres, NUM=sg] -> 'says' | 'claims'

V[SUBCAT=intrans, TENSE=pres, NUM=pl] -> 'disappear' | 'walk'
V[SUBCAT=trans, TENSE=pres, NUM=pl] -> 'see' | 'like'
V[SUBCAT=clause, TENSE=pres, NUM=pl] -> 'say' | 'claim'

V[SUBCAT=intrans, TENSE=past, NUM=?n] -> 'disappeared' | 'walked'
V[SUBCAT=trans, TENSE=past, NUM=?n] -> 'saw' | 'liked'
V[SUBCAT=clause, TENSE=past, NUM=?n] -> 'said' | 'claimed'

Comp -> 'that'
PREP -> 'from' | 'with'
Det -> 'a' | 'the'
N[BAR=0] -> 'student' | 'France' | 'grades'
ADJ -> 'good'


# 5 (incomplete)
cp = nltk.load_parser('grammars/book_grammars/german.fcfg', trace=2)
tokens = 'ich folge den Katze'.split()
for tree in cp.parse(tokens):
    print(tree)


# 7
cp = nltk.load_parser('grammars/book_grammars/german.fcfg')
tokens = 'ich folge den Katze'.split()
tag = 0
for tree in cp.parse(tokens):
    if tree:
        tag = 1
        print(tree)
if tag == 0:
    print('FAIL')


# 8
fs1 = nltk.FeatStruct("[A = ?x, B= [C = ?x]]")
fs2 = nltk.FeatStruct("[B = [D = d]]")
fs3 = nltk.FeatStruct("[B = [C = d]]")
fs4 = nltk.FeatStruct("[A = (1)[B = b], C->(1)]")
fs5 = nltk.FeatStruct("[A = (1)[D = ?x], C = [E -> (1), F = ?x] ]")
fs6 = nltk.FeatStruct("[A = [D = d]]")
fs7 = nltk.FeatStruct("[A = [D = d], C = [F = [D = d]]]")
fs8 = nltk.FeatStruct("[A = (1)[D = ?x, G = ?x], C = [B = ?x, E -> (1)] ]")
fs9 = nltk.FeatStruct("[A = [B = b], C = [E = [G = e]]]")
fs10 = nltk.FeatStruct("[A = (1)[B = b], C -> (1)]")

print(fs2.unify(fs1))
print(fs1.unify(fs3))
print(fs4.unify(fs5))
print(fs5.unify(fs6))
print(fs5.unify(fs7))
print(fs5.unify(fs6))
print(fs8.unify(fs9))
print(fs8.unify(fs10))


# 9
fs1 = nltk.FeatStruct("[A = ?x, B= [C = ?x]]")
fs2 = nltk.FeatStruct("[ADDRESS1=?x, ADDRESS2=?x]")
print(fs1)
print(fs2)


# 12
parser = nltk.load_parser('9_12.fcfg')
token_1 = "The farmer loaded sand into the cart".split()
token_2 = "The farmer loaded the cart with sand".split()
for tree in parser.parse(token_1):
    print(tree)
for tree in parser.parse(token_2):
    print(tree)

# 9_12.fcfg
% start S

S -> NP VP[TENSE=?t, NUM=?n]

NP -> Det NS

VP[TENSE=?t, NUM=?n] -> V[SUBCAT=FO1, TENSE=?t, NUM=?n]
VP[TENSE=?t, NUM=?n] -> V[SUBCAT=FO2, TENSE=?t, NUM=?n]

VP[SUBCAT=FO1, TENSE=past, NUM=?n] -> V Det NO PREP NO
VP[SUBCAT=FO2, TENSE=past, NUM=?n] -> V NO PREP Det NO

Det -> 'The' | 'the'
NS -> 'farmer'
NO -> 'cart' | 'sand'
PREP -> 'with' | 'into'
V -> 'loaded' | 'filled' | 'dumped'