CODE:
#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
Created on 2014-8-22
@author: guaguastd
@name: ceo_cto_bigrams.py
'''
import nltk
ceo_bigrams = nltk.bigrams("Chief Executive Officer".split(), pad_right=True, pad_left=True)
cto_bigrams = nltk.bigrams("Chief Technology Officer".split(), pad_right=True, pad_left=True)
print 'ceo_bigrams:'
print ceo_bigrams
print '\rcto_bigrams:'
print cto_bigrams
print '\rintersection between ceo_bigrams and cto_bigrams:'
print len(set(ceo_bigrams).intersection(set(cto_bigrams)))
RESULT:
ceo_bigrams:
[(None, 'Chief'), ('Chief', 'Executive'), ('Executive', 'Officer'), ('Officer', None)]
cto_bigrams:
[(None, 'Chief'), ('Chief', 'Technology'), ('Technology', 'Officer'), ('Officer', None)]
intersection between ceo_bigrams and cto_bigrams:
2