# batch_size =2,seq_len =4,
words = ['I love china people', 'china people is kind']
#定义词典
wordDict = {'I': 0, 'love': 1, 'china': 2, 'people': 3, 'is': 4, 'kind': 5}
#向量化
wordVec = [[wordDict[word] for word in line.split()] for line in words]
#变为torch tensor
wordVec = torch.tensor(wordVec, dtype=torch.long)
embeddingSize = 128
embedding = nn.Embedding(len(wordDict.keys()), embeddingSize)
# wordsEmbedding 为[batch_size,seq_len,embedding_size]结构
wordsEmbedding = embedding(wordVec)
print(wordsEmbedding.size())
上面是简单的word2Vec例子,相对于现在流行的GPT,以及BERT系列,可以作为baseline