python安装及入门学习
python安装:http://www.jianshu.com/p/b8a703df5318
然后再安装一个pycharm就能在这个集成开发环境中写python代码了:http://www.jetbrains.com/pycharm/download/#section=windows
以上安装都是很方便的
学习python的主要目的:自己的研究课题是大数据自然语言处理这块,希望用python自带的一些模型来完成课题。目前用python成功的运行了word2vec模型。现在还处于学习的起步阶段,所以想记录下自己学习过程,同时也区分一下这门高级语言与其他语言的不同。因为我以前擅长的语言是java,现在为了研究的需要,转而来研究python。
python初步学习
这是关于word2vec
#! -*- coding=utf-8 -*-
from gensim.models import word2vec
import logging
# 主程序
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
sentences = word2vec.Text8Corpus(u"C:\/Users\/yaojuan2\/Desktop\/text8") #加载语料
#模型初始化
model = word2vec.Word2Vec(sentences, size=200) #训练skip-gram模型; 默认window=5
model2 = word2vec.Word2Vec("hello wrold! hello The training algorithms were originally ported from the C package", size=100, window=5, min_count=5, workers=4)
print "--------/n"
print "--------/n"
y1 = model.similarity("woman", "man")
print u"woman和man的相似度为:", y1
print "--------/n"
print "--------/n"
y2 = model.most_similar("good", topn=20) # 20个最相关的
print u"和good最相关的词有:/n"
for item in y2:
print item[0], item[1]
print "--------/n"
print "--------/n"
print ' "boy" is to "father" as "girl" is to ...? /n'
y3 = model.most_similar(['girl', 'father'], ['boy'], topn=3)
for item in y3:
print item[0], item[1]
print "--------/n"
print "--------/n"
y4 = model.doesnt_match("breakfast cereal dinner lunch".split())
print u"不合群的词:", y4
print "--------/n"
print "--------/n"
y5 = model.init_sims()
#model.save('/tmp/text8.model')
#model.save_word2vec_format('/tmp/text8.model.bin', binary=True)
#model = word2vec.Word2Vec.load_word2vec_format('/tmp/vectors.bin', binary=True)
print "--------/n"
print "--------/n"
model.most_similar(['girl', 'father'], ['boy'], topn=3)
print "--------/n"
print "--------/n"
more_examples = ["he his she", "big bigger bad", "going went being"]
for example in more_examples:
a, b, x = example.split()
predicted = model.most_similar([x, b], [a])[0][0]
print "'%s' is to '%s' as '%s' is to '%s'" % (a, b, x, predicted)
print "--------/n"
print "--------/n"
y6=model.wv['computer'] # numpy vector of a word#输出词向量
print y6