text2: Sense and Sensibility by Jane Austen 1811
text3: The Book of Genesis
text4: Inaugural Address Corpus
text5: Chat Corpus
text6: Monty Python and the Holy Grail
text7: Wall Street Journal
text8: Personals Corpus
text9: The Man Who Was Thursday by G . K . Chesterton 1908
print(text1.name)#书名
print(text1.concordance(word=“love”))#上下文
print(text1.similar(word=“very”))#相似上下文场景
print(text1.common_contexts(words=[“pretty”,“very”]))#相似上下文
text4.dispersion_plot(words=[‘citizens’,‘freedom’,‘democracy’])#美国总统就职演说词汇分布图
print(text1.collocations())#搭配
print(type(text1))
print(len(text1))#文本长度
print(len(set(text1)))#词汇长度
fword=FreqDist(text1)
print(text1.name)#书名
print(fword)
voc=fword.most_common(50)#频率最高的50个字符
fword.plot(50,cumulative=True)#绘出波形图
print(fword.hapaxes())#低频词
分词和分句
-----
from nltk.tokenize import word_tokenize,sent_tokenize
#分词 TreebankWordTokenizer PunktTokenizer
print(word_tokenize(text=“All work and no play makes jack a dull boy, all work and no play”,language=“english”))
#分句
data = “All work and no play makes jack dull boy. All work and no play makes jack a dull boy.”
print(sent_tokenize(data))
from nltk.corpus import stopwords
print(type(stopwords.words(‘english’)))
print([w for w in word_tokenize(text=“All work and no play makes jack a dull boy, all work and no play”,language=“english”) if w not in stopwords.words(‘english’)])#去掉停用词
时态 和 单复数
--------
from nltk.stem import PorterStemmer
data=word_tokenize(text=“All work and no play makes jack a dull boy, all work and no play,playing,played”,language=“english”)
ps=PorterStemmer()
for w in data:
print(w,":",ps.stem(word=w))
from nltk.stem import SnowballStemmer
snowball_stemmer = SnowballStemmer(‘english’)
snowball_stemmer.stem(‘presumably’)
#u’presum’
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
wordnet_lemmatizer.lemmatize(‘dogs’)
u’dog’
词性标注
----
sentence = “”“At eight o’clock on Thursday morning… Arthur didn’t feel very good.”“”
tokens = nltk.word_tokenize(sentence)
print(tokens)
#[‘At’, ‘eight’, “o’clock”, ‘on’, ‘Thursday’, ‘morning’,
‘Arthur’, ‘did’, “n’t”, ‘feel’, ‘very’, ‘good’, ‘.’]
nltk.help.upenn_tagset(‘NNP’)#输出NNP的含义
tagged = nltk.pos_tag(tokens)
nltk.batch_pos_tag([[‘this’, ‘is’, ‘batch’, ‘tag’, ‘test’], [‘nltk’, ‘is’, ‘text’, ‘analysis’, ‘tool’]])#批量标注
print(tagged)
[(‘At’, ‘IN’), (‘eight’, ‘CD’), (“o’clock”, ‘JJ’), (‘on’, ‘IN’),
(‘Thursday’, ‘NNP’), (‘morning’, ‘NN’)]
附表:
![](https://pythonspot-9329.kxcdn.com/wp-content/uploads/2016/08/nltk-speech-codes.png)
分类器
---
下面列出的是NLTK中自带的分类器
from nltk.classify.api import ClassifierI, MultiClassifierI
from nltk.classify.megam import config_megam, call_megam
from nltk.classify.weka import WekaClassifier, config_weka
from nltk.classify.naivebayes import NaiveBayesClassifier
from nltk.classify.positivenaivebayes import PositiveNaiveBayesClassifier
from nltk.classify.decisiontree import DecisionTreeClassifier
from nltk.classify.rte_classify import rte_classifier, rte_features, RTEFeatureExtractor
from nltk.classify.util import accuracy, apply_features, log_likelihood
from nltk.classify.scikitlearn import SklearnClassifier
from nltk.classify.maxent import (MaxentClassifier, BinaryMaxentFeatureEncoding,TypedMaxentFeatureEncoding,ConditionalExponentialClassifier)
### 应用1:通过名字预测性别
from nltk.corpus import names
#特征取的是最后一个字母
def gender_features(word):
return {'last_letter': word[-1]}
#数据准备
name=[(n,‘male’) for n in names.words(‘male.txt’)]+[(n,‘female’) for n in names.words(‘female.txt’)]
print(len(name))
#特征提取和训练模型
features=[(gender_features(n),g) for (n,g) in name]
classifier = nltk.NaiveBayesClassifier.train(features[:6000])
#测试
print(classifier.classify(gender_features(‘Frank’)))
from nltk import classify
print(classify.accuracy(classifier,features[6000:]))
### 应用2:情感分析
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import names
文末
好了,今天的分享就到这里,如果你对在面试中遇到的问题,或者刚毕业及工作几年迷茫不知道该如何准备面试并突破现状提升自己,对于自己的未来还不够了解不知道给如何规划,可以来看看同行们都是如何突破现状,怎么学习的,来吸收他们的面试以及工作经验完善自己的之后的面试计划及职业规划。
这里放上一部分我工作以来以及参与过的大大小小的面试收集总结出来的相关的几十套腾讯、头条、阿里、美团等公司21年的面试专题,其中把技术点整理成了视频和PDF(实际上比预期多花了不少精力),包含知识脉络 + 诸多细节,由于篇幅有限,这里以图片的形式给大家展示一部分免费分享给大家,主要还是希望大家在如今大环境不好的情况下面试能够顺利一点,希望可以帮助到大家~
还有 高级架构技术进阶脑图、Android开发面试专题资料,高级进阶架构资料 帮助大家学习提升进阶,也节省大家在网上搜索资料的时间来学习,也可以分享给身边好友一起学习。
【Android核心高级技术PDF文档,BAT大厂面试真题解析】
【延伸Android必备知识点】
这里只是整理出来的部分面试题,后续会持续更新,希望通过这些高级面试题能够降低面试Android岗位的门槛,让更多的Android工程师理解Android系统,掌握Android系统。喜欢的话麻烦点击一个喜欢在关注一下~
【Android核心高级技术PDF文档,BAT大厂面试真题解析】
[外链图片转存中…(img-J4JFvrXK-1720103455679)]
【延伸Android必备知识点】
[外链图片转存中…(img-VvjfQJpk-1720103455680)]
这里只是整理出来的部分面试题,后续会持续更新,希望通过这些高级面试题能够降低面试Android岗位的门槛,让更多的Android工程师理解Android系统,掌握Android系统。喜欢的话麻烦点击一个喜欢在关注一下~