02 获得文本语料和词汇资源
import nltk
gtb = nltk.corpus.gutenberg.fileids()
print(gtb)
'''
['austen-emma.txt', 'austen-persuasion.txt', 'austen-sense.txt', 'bible-kjv.txt', 'blake-poems.txt',
'bryant-stories.txt', 'burgess-busterbrown.txt', 'carroll-alice.txt', 'chesterton-ball.txt',
'chesterton-brown.txt', 'chesterton-thursday.txt', 'edgeworth-parents.txt', 'melville-moby_dick.txt',
'milton-paradise.txt', 'shakespeare-caesar.txt', 'shakespeare-hamlet.txt',
'shakespeare-macbeth.txt', 'whitman-leaves.txt']
'''
emma = nltk.corpus.gutenberg.words('austen-emma.txt')
print(len(emma))
emma = nltk.Text(nltk.corpus.gutenberg.words('austen-emma.txt'))
print(emma.concordance("surprize"))
'''
Displaying 25 of 37 matches:
er father , was sometimes taken by surprize at his being still able to pity `
...
g engaged !" Emma even jumped with surprize ;-- and , horror - struck , exclai
None
'''
from nltk.corpus import gutenberg
print(gutenberg.fileids())
'''
['austen-emma.txt', 'austen-persuasion.txt', ..., 'whitman-leaves.txt']
'''
for fileid in gutenberg.fileids():
num_chars = len(gutenberg.raw(fileid))
num_words = len(gutenberg.words(fileid))
num_sents = len(gutenberg.sents(fileid))
num_vocab = len(set([w.lower() for w in gutenberg.words(fileid)]))
print(int(num_chars/num_words), int(num_words/num_sents), int(num_words/num_vocab), fileid)
'''
4 24 26 austen-emma.txt
4 26 16 austen-persuasion.txt
4 28 22 austen-sense.txt
4 33 79 bible-kjv.txt
4 19 5 blake-poems.txt
4 19 14 bryant-stories.txt
4 17 12 burgess-busterbrown.txt
4 20 12 carroll-alice.txt
4 20 11 chesterton-ball.txt
4 22 11 chesterton-brown.txt
4 18 10 chesterton-thursday.txt
4 20 24 edgeworth-parents.txt
4 25 15 melville-moby_dick.txt
4 52 10 milton-paradise.txt
4 11 8 shakespeare-caesar.txt
4 12 7 shakespeare-hamlet.txt
4 12 6 shakespeare-macbeth.txt
4 36 12 whitman-leaves.txt
'''
macbeth_sentences = gutenberg.sents('shakespeare-macbeth.txt')
print(macbeth_sentences)
print(macbeth_sentences[1037])
longest_len = max([len(s) for s in macbeth_sentences])
l1 = [s for s in macbeth_sentences if (len(s) == longest_len)]
print(l1)
'''
[['Doubtfull', 'it', 'stood', ',', 'As', 'two', 'spent', 'Swimmers', ',', 'that', 'doe', 'cling',
...
'Head', 'vpon', 'our', 'Battlements']]
'''
from nltk.corpus import webtext
for fileid in webtext.fileids():
print(fileid, webtext.raw(fileid)[:65], '...')
'''
firefox.txt Cookie Manager: "Don't allow sites that set removed cookies to se ...
grail.txt SCENE 1: [wind] [clop clop clop]
KING ARTHUR: Whoa there! [clop ...
overheard.txt White guy: So, do you have any plans for this evening?
Asian girl ...
pirates.txt PIRATES OF THE CARRIBEAN: DEAD MAN'S CHEST, by Ted Elliott & Terr ...
singles.txt 25 SEXY MALE, seeks attrac older single lady, for discreet encoun ...
wine.txt Lovely delicate, fragrant Rhone wine. Polished leather and strawb ...
'''
from nltk.corpus import nps_chat
chatroom = nps_chat.posts('10-19-20s_706posts.xml')
print(chatroom)
'''[['now', 'im', 'left', 'with', 'this', 'gay', 'name'], [':P'], ...]'''
from nltk.corpus import brown
print(brown.categories())
'''
['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor', 'learned', 'lore',
'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
'''
print(brown.words(categories='news'))
'''['The', 'Fulton', 'County', 'Grand', 'Jury', 'said', ...]'''
print(brown.words(fileids=['cg22']))
'''['Does', 'our', 'society', 'have', 'a', 'runaway', ',', ...]'''
print(brown.sents(categories=['news', 'editorial', 'reviews']))
'''
[['The', 'Fulton', 'County', 'Grand', 'Jury', 'said', 'Friday', 'an', 'investigation', 'of', "Atlanta's",
'recent', 'primary', 'election', 'produced', '``', 'no', 'evidence', "''", 'that', 'any', 'irregularities',
'took', 'place', '.'], ['The', 'jury', 'further', 'said', 'in', 'term-end', 'presentments', 'that', 'the',
'City', 'Executive', 'Committee', ',', 'which', 'had', 'over-all', 'charge', 'of', 'the', 'election',
',', '``', 'deserves', 'the', 'praise', 'and', 'thanks', 'of', 'the', 'City', 'of', 'Atlanta', "''",
'for', 'the', 'manner', 'in', 'which', 'the', 'election', 'was', 'conducted', '.'], ...]
'''
from nltk.corpus import brown
news_text = brown.words(categories='news')
fdist = nltk.FreqDist([w.lower() for w in news_text])
modals = ['can', 'could', 'may', 'might', 'must', 'will']
for m in modals:
print(m + ':', fdist[m])
'''
can: 94
could: 87
may: 93
might: 38
must: 53
will: 389
'''
cfd = nltk.ConditionalFreqDist(
(genre, word)
for genre in brown.categories()
for word in brown.words(categories=genre))
genres = ['news', 'religion', 'hobbies', 'science_fiction', 'romance', 'humor']
modals = ['can', 'could', 'may', 'might', 'must', 'will']
res = cfd.tabulate(conditions=genres, samples=modals)
print(res)
'''
can could may might must will
news 93 86 66 38 50 389
religion 82 59 78 12 54 71
hobbies 268 58 131 22 83 264
science_fiction 16 49 4 12 8 16
romance 74 193 11 51 45 43
humor 16 30 8 8 9 13
None
'''
from nltk.corpus import reuters
print(reuters.fileids())
'''
['test/14826', 'test/14828', 'test/14829', ..., 'training/9994', 'training/9995']
'''
print(reuters.categories())
'''
['acq', 'alum', 'barley', 'bop', ..., 'wpi', 'yen', 'zinc']
'''
print(reuters.categories('training/9865'))
print(reuters.categories(['training/9865', 'training/9880']))
print(reuters.fileids('barley'))
'''
['test/15618', 'test/15649', 'test/15676', ..., 'training/9865', 'training/9958']
'''
print(reuters.fileids(['barley', 'corn']))
'''
['test/14832', 'test/14858', 'test/15033', ..., 'training/9958', 'training/9989']
'''
print(reuters.words('training/9865')[:14])
'''
['FRENCH', 'FREE', 'MARKET', 'CEREAL', 'EXPORT', 'BIDS', 'DETAILED', 'French', 'operators', 'have',
'requested', 'licences', 'to', 'export']
'''
print(reuters.words(['training/9865', 'training/9880']))
'''['FRENCH', 'FREE', 'MARKET', 'CEREAL', 'EXPORT', ...]'''
print(reuters.words(categories='barley'))
'''['FRENCH', 'FREE', 'MARKET', 'CEREAL', 'EXPORT', ...]'''
print(reuters.words(categories=['barley', 'corn']))
'''['THAI', 'TRADE', 'DEFICIT', 'WIDENS', 'IN', 'FIRST', ...]'''
from nltk.corpus import inaugural
print(inaugural.fileids())
'''['1789-Washington.txt', '1793-Washington.txt', ..., '2005-Bush.txt', '2009-Obama.txt']'''
res = [fileid[:4] for fileid in inaugural.fileids()]
print(res)
'''['1789', '1793', '1797', ..., '2005', '2009']'''
cfd = nltk.ConditionalFreqDist(
(target, fileid[:4])
for fileid in inaugural.fileids()
for w in inaugural.words(fileid)
for target in ['america', 'citizen']
if w.lower().startswith(target)
)
cfd.plot()
print(nltk.corpus.cess_esp.words())
'''['El', 'grupo', 'estatal', 'Electricité_de_France', ...]'''
print(nltk.corpus.floresta.words())
'''['Um', 'revivalismo', 'refrescante', 'O', '7_e_Meio', ...]'''
print(nltk.corpus.indian.words('hindi.pos'))
'''['पूर्ण', 'प्रतिबंध', 'हटाओ', ':', 'इराक', 'संयुक्त', ...]'''
print(nltk.corpus.udhr.fileids())
'''['Abkhaz-Cyrillic+Abkh', 'Abkhaz-UTF8', ..., 'Zhuang-Latin1', 'Zulu-Latin1']'''
print(nltk.corpus.udhr.words('Javanese-Latin1')[11:])
'''['Saben', 'umat', 'manungsa', 'lair', 'kanthi', 'hak', ...]'''
from nltk.corpus import udhr
languages = ['Chickasaw', 'English', 'German_Deutsch', 'Greenlandic_Inuktikut', 'Hungarian_Magyar', 'Ibibio_Efik']
cfd = nltk.ConditionalFreqDist(
(lang, len(word))
for lang in languages
for word in udhr.words(lang + '-Latin1')
)
cfd.plot(cumulative=True)
raw = gutenberg.raw("burgess-busterbrown.txt")
print(raw[1:20])
words = gutenberg.words("burgess-busterbrown.txt")
print(words[1:20])
'''['The', 'Adventures', 'of', 'Buster', ..., 'Bear']'''
sents = gutenberg.sents("burgess-busterbrown.txt")
print(sents[1:20])
'''[['I'], ['BUSTER', 'BEAR', 'GOES', 'FISHING'], ..., 'for', 'breakfast', '.']]'''
from nltk.corpus import PlaintextCorpusReader
corpus_root = 'D:/tmp/tensorflow/data'
wordlists = PlaintextCorpusReader(corpus_root, 'my*\.txt')
print(wordlists.fileids())
print(wordlists.readme())
print(wordlists.words('mya.txt'))
text = ['The', 'Fulton', 'County', 'Grand', 'Jury', 'said']
pairs = [('news', 'The'), ('news', 'Fulton'), ('news', 'County')]
from nltk.corpus import brown
cfd = nltk.ConditionalFreqDist(
(genre, word)
for genre in brown.categories()
for word in brown.words(categories=genre)
)
genre_word = [(genre, word)
for genre in ['news', 'romance']
for word in brown.words(categories=genre)]
print(len(genre_word))
print(genre_word[:4])
print(genre_word[-4:])
cfd = nltk.ConditionalFreqDist(genre_word)
print(cfd)
print(cfd.conditions())
print(cfd['news'])
print(cfd['romance'])
print(list(cfd['romance']))
print(cfd['romance']['could'])
from nltk.corpus import inaugural
cfd = nltk.ConditionalFreqDist(
(target, fileid[:4])
for fileid in inaugural.fileids()
for w in inaugural.words(fileid)
for target in ['america', 'citizen']
if w.lower().startswith(target)
)
from nltk.corpus import udhr
languages = ['Chickasaw', 'English', 'German_Deutsch', 'Greenlandic_Inuktikut', 'Hungarian_Magyar', 'Ibibio_Efik']
cfd = nltk.ConditionalFreqDist(
(lang, len(word))
for lang in languages
for word in udhr.words(lang + '-Latin1')
)
cfd.tabulate(conditions=['English', 'German_Deutsch'], samples = range(10), cumulative=True)
'''
0 1 2 3 4 5 6 7 8 9
English 0 185 525 883 997 1166 1283 1440 1558 1638
German_Deutsch 0 171 263 614 717 894 1013 1110 1213 1275
'''
sent = ['In', 'the', 'beginning', 'God', 'Created', 'the', 'heaven', 'and', 'the', 'earth', '.']
print(nltk.bigrams(sent))
print(list(nltk.bigrams(sent)))
'''
[('In', 'the'), ('the', 'beginning'), ('beginning', 'God'), ('God', 'Created'), ('Created', 'the'),
('the', 'heaven'), ('heaven', 'and'), ('and', 'the'), ('the', 'earth'), ('earth', '.')]
'''
def unusual_words(text):
text_vocab = set(w.lower() for w in text if w.isalpha())
english_vocab = set(w.lower() for w in nltk.corpus.words.words())
unusual = text_vocab.difference(english_vocab)
return sorted(unusual)
res = unusual_words(nltk.corpus.gutenberg.words('austen-sense.txt'))
print(res)
'''['abbeyland', 'abhorred', 'abilities', ..., 'yielded', 'youngest']'''
res = unusual_words(nltk.corpus.nps_chat.words())
print(res)
'''['aaaaaaaaaaaaaaaaa', 'aaahhhh', 'abortions', ..., 'zzzzzzzing', 'zzzzzzzz']'''
from nltk.corpus import stopwords
print(stopwords.words('english'))
'''['i', 'me', 'my', 'myself', 'we', ..., 'won', 'wouldn']'''
def content_fraction(text):
stopwords = nltk.corpus.stopwords.words('english')
content = [w for w in text if w.lower() not in stopwords]
return len(content)/len(text)
print(content_fraction(nltk.corpus.reuters.words()))
puzzle_letters = nltk.FreqDist('egivrvonl')
obligatory = 'r'
wordlist = nltk.corpus.words.words()
res = [w for w in wordlist if len(w) >= 6
and obligatory in w
and nltk.FreqDist(w) <= puzzle_letters]
print(res)
'''['glover', 'gorlin', 'govern', 'grovel', 'ignore', ..., 'violer', 'virole']'''
names = nltk.corpus.names
print(names.fileids())
male_names = names.words('male.txt')
female_names = names.words('female.txt')
res = [w for w in male_names if w in female_names]
print(res)
'''['Abbey', 'Abbie', 'Abby', ..., 'Winnie', 'Winny', 'Wynn']'''
entries = nltk.corpus.cmudict.entries()
print(len(entries))
for entry in entries[39943:39951]:
print(entry)
'''
('explorer', ['IH0', 'K', 'S', 'P', 'L', 'AO1', 'R', 'ER0'])
('explorers', ['IH0', 'K', 'S', 'P', 'L', 'AO1', 'R', 'ER0', 'Z'])
('explores', ['IH0', 'K', 'S', 'P', 'L', 'AO1', 'R', 'Z'])
('exploring', ['IH0', 'K', 'S', 'P', 'L', 'AO1', 'R', 'IH0', 'NG'])
('explosion', ['IH0', 'K', 'S', 'P', 'L', 'OW1', 'ZH', 'AH0', 'N'])
('explosions', ['IH0', 'K', 'S', 'P', 'L', 'OW1', 'ZH', 'AH0', 'N', 'Z'])
('explosive', ['IH0', 'K', 'S', 'P', 'L', 'OW1', 'S', 'IH0', 'V'])
('explosively', ['EH2', 'K', 'S', 'P', 'L', 'OW1', 'S', 'IH0', 'V', 'L', 'IY0'])
'''
for word, pron in entries:
if len(pron) == 3:
ph1, ph2, ph3 = pron
if ph1 == 'P' and ph3 == 'T':
print(word, ph2)
'''
pait EY1
pat AE1
...
put UH1
putt AH1
'''
syllable = ['N', 'IHO', 'K', 'S']
res = [word for word, pron in entries if pron[-4:] == syllable]
print(res)
'''[]'''
res = [w for w, pron in entries if pron[-1] == 'M' and w[-1] == 'n']
print(res)
'''['autumn', 'column', 'condemn', 'damn', 'goddamn', 'hymn', 'solemn']'''
res = sorted(set(w[:2] for w, pron in entries if pron[0] == 'N' and w[0] != 'n'))
print(res)
'''['gn', 'kn', 'mn', 'pn']'''
def stress(pron):
return [char for phone in pron for char in phone if char.isdigit()]
res = [w for w, pron in entries if stress(pron) == ['0', '1', '0', '2', '0']]
print(res)
'''['abbreviated', 'abbreviated', 'abbreviating', ..., 'vocabulary', 'voluntarism']'''
res = [w for w, pron in entries if stress(pron) == ['0', '2', '0', '1', '0']]
print(res)
'''['abbreviation', 'abbreviations', 'abomination', ..., 'wakabayashi', 'yekaterinburg']'''
p3 = [(pron[0] + '-' + pron[2], word)
for (word, pron) in entries
if pron[0] == 'P' and len(pron) == 3]
cfd = nltk.ConditionalFreqDist(p3)
for template in cfd.conditions():
if len(cfd[template]) > 10:
words = cfd[template].keys()
wordlist = ' '.join(words)
print(template, wordlist[:70] + "...")
'''
P-P paap paape pap pape papp paup peep pep pip pipe pipp poop pop pope pop...
P-R paar pair par pare parr pear peer pier poor poore por pore porr pour...
P-K pac pack paek paik pak pake paque peak peake pech peck peek perc perk ...
P-S pace pass pasts peace pearse pease perce pers perse pesce piece piss p...
P-L pahl pail paille pal pale pall paul paule paull peal peale pearl pearl...
P-N paign pain paine pan pane pawn payne peine pen penh penn pin pine pinn...
P-Z pais paiz pao's pas pause paws pays paz peas pease pei's perz pez pies...
P-T pait pat pate patt peart peat peet peete pert pet pete pett piet piett...
P-CH patch pautsch peach perch petsch petsche piche piech pietsch pitch pit...
P-UW1 peru peugh pew plew plue prew pru prue prugh pshew pugh...
'''
prondict = nltk.corpus.cmudict.dict()
print(prondict['fire'])
prondict['blog'] = [['B', 'L', 'AA1', 'G']]
print(prondict['blog'])
text = ['natural', 'language', 'processing']
res = [ph for w in text for ph in prondict[w][0]]
print(res)
'''
['N', 'AE1', 'CH', 'ER0', 'AH0', 'L', 'L', 'AE1', 'NG', 'G', 'W', 'AH0', 'JH', 'P',
'R', 'AA1', 'S', 'EH0', 'S', 'IH0', 'NG']
'''
from nltk.corpus import swadesh
print(swadesh.fileids())
'''
['be', 'bg', 'bs', 'ca', 'cs', 'cu', 'de', 'en', 'es', 'fr', 'hr', 'it', 'la', 'mk', 'nl',
'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sr', 'sw', 'uk']
'''
print(swadesh.words('en'))
'''['I', 'you (singular), thou', ..., 'if', 'because', 'name']'''
fr2en = swadesh.entries(['fr', 'en'])
print(fr2en)
'''[('je', 'I'), ('tu, vous', 'you (singular), thou'), ..., ('parce que', 'because'), ('nom', 'name')]'''
translate = dict(fr2en)
print(translate['chien'])
print(translate['jeter'])
de2en = swadesh.entries(['de', 'en'])
es2en = swadesh.entries(['es', 'en'])
translate.update(dict(de2en))
translate.update(dict(es2en))
print(translate['Hund'])
print(translate['perro'])
languages = ['en', 'de', 'nl', 'es', 'fr', 'pt', 'la']
for i in [139, 140, 141, 142]:
print(swadesh.entries(languages)[i])
'''
('say', 'sagen', 'zeggen', 'decir', 'dire', 'dizer', 'dicere')
('sing', 'singen', 'zingen', 'cantar', 'chanter', 'cantar', 'canere')
('play', 'spielen', 'spelen', 'jugar', 'jouer', 'jogar, brincar', 'ludere')
('float', 'schweben', 'zweven', 'flotar', 'flotter', 'flutuar, boiar', 'fluctuare')
'''
from nltk.corpus import toolbox
print(toolbox.entries('rotokas.dic'))
'''
[('kaa', [('ps', 'V'), ('pt', 'A'), ..., ('tkp', 'laplap'), ('dt', '28/Jul/2004')])]
'''
from nltk.corpus import wordnet as wn
print(wn.synsets('motorcar'))
print(wn.synset('car.n.01').lemma_names())
print(wn.synset('car.n.01').definition())
print(wn.synset('car.n.01').examples())
print(wn.synset('car.n.01').lemmas())
'''
[Lemma('car.n.01.car'), Lemma('car.n.01.auto'), Lemma('car.n.01.automobile'),
Lemma('car.n.01.machine'), Lemma('car.n.01.motorcar')]
'''
print(wn.lemma('car.n.01.automobile'))
print(wn.lemma('car.n.01.automobile').synset())
print(wn.lemma('car.n.01.automobile').name())
print(wn.synsets('car'))
'''[Synset('car.n.01'), Synset('car.n.02'), Synset('car.n.03'), Synset('car.n.04'), Synset('cable_car.n.01')]'''
for synset in wn.synsets('car'):
print(synset.lemma_names())
'''
['car', 'auto', 'automobile', 'machine', 'motorcar']
['car', 'railcar', 'railway_car', 'railroad_car']
['car', 'gondola']
['car', 'elevator_car']
['cable_car', 'car']
'''
print(wn.lemmas('car'))
motorcar = wn.synset('car.n.01')
types_of_motorcar = motorcar.hyponyms()
print(types_of_motorcar[26])
'''[Lemma('car.n.01.car'), Lemma('car.n.02.car'), Lemma('car.n.03.car'),
Lemma('car.n.04.car'), Lemma('cable_car.n.01.car')]'''
res = sorted([lemma.name() for synset in types_of_motorcar for lemma in synset.lemmas()])
print(res)
'''['Model_T', 'S.U.V.', 'SUV', 'Stanley_Steamer', ..., 'used-car', 'waggon', 'wagon']'''
print(motorcar.hypernyms())
paths = motorcar.hypernym_paths()
print(len(paths))
res = [synset.name() for synset in paths[0]]
print(res)
'''
['entity.n.01', 'physical_entity.n.01', 'object.n.01', 'whole.n.02', 'artifact.n.01', 'instrumentality.n.03',
'container.n.01', 'wheeled_vehicle.n.01', 'self-propelled_vehicle.n.01', 'motor_vehicle.n.01', 'car.n.01']
'''
res = [synset.name() for synset in paths[1]]
print(res)
'''
['entity.n.01', 'physical_entity.n.01', 'object.n.01', 'whole.n.02', 'artifact.n.01', 'instrumentality.n.03',
'conveyance.n.03', 'vehicle.n.01', 'wheeled_vehicle.n.01', 'self-propelled_vehicle.n.01',
'motor_vehicle.n.01', 'car.n.01']
'''
print(motorcar.root_hypernyms())
print(wn.synset('tree.n.01').part_meronyms())
'''[Synset('burl.n.02'), Synset('crown.n.07'), Synset('limb.n.02'), Synset('stump.n.01'), Synset('trunk.n.01')]'''
print(wn.synset('tree.n.01').substance_meronyms())
'''[Synset('heartwood.n.01'), Synset('sapwood.n.01')]'''
print(wn.synset('tree.n.01').member_holonyms())
'''[Synset('forest.n.01')]'''
for synset in wn.synsets('mint', wn.NOUN):
print(synset.name() + ':', synset.definition())
'''
batch.n.02: (often followed by `of') a large number or amount or extent
mint.n.02: any north temperate plant of the genus Mentha with aromatic leaves and small mauve flowers
mint.n.03: any member of the mint family of plants
mint.n.04: the leaves of a mint plant used fresh or candied
mint.n.05: a candy that is flavored with a mint oil
mint.n.06: a plant where money is coined by authority of the government
'''
print(wn.synset('mint.n.04').part_holonyms())
print(wn.synset('mint.n.04').substance_holonyms())
print(wn.synset('walk.v.01').entailments())
print(wn.synset('eat.v.01').entailments())
print(wn.synset('tease.v.03').entailments())
print(wn.lemma('supply.n.02.supply').antonyms())
print(wn.lemma('rush.v.01.rush').antonyms())
print(wn.lemma('horizontal.a.01.horizontal').antonyms())
'''[Lemma('inclined.a.02.inclined'), Lemma('vertical.a.01.vertical')]'''
print(wn.lemma('staccato.r.01.staccato').antonyms())
right = wn.synset('right_whale.n.01')
orca = wn.synset('orca.n.01')
minke = wn.synset('minke_whale.n.01')
tortoise = wn.synset('tortoise.n.01')
novel = wn.synset('novel.n.01')
print(right.lowest_common_hypernyms(minke))
print(right.lowest_common_hypernyms(orca))
print(right.lowest_common_hypernyms(tortoise))
print(right.lowest_common_hypernyms(novel))
print(wn.synset('baleen_whale.n.01').min_depth())
print(wn.synset('whale.n.02').min_depth())
print(wn.synset('vertebrate.n.01').min_depth())
print(wn.synset('entity.n.01').min_depth())
print(right.path_similarity(minke))
print(right.path_similarity(orca))
print(right.path_similarity(tortoise))
print(right.path_similarity(novel))