思路:
- 词向量:
- 将词向量中值最大的维度的下标作为该词向量的标签。
- 采用t-SNE对将词向量压缩到2维空间,然后画成散点图。点的颜色就是该词向量的标签。
- 类向量
- 类向量指的是在分类任务中,样本在输入softmax之前的向量。
- 直接采用t-SNE对将类向量压缩到2维空间,然后画出散点图。点的颜色就是该类向量的标签。
例子:
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
import numpy as np
# 假设现在有50个15未的词向量
word_embedding = \
np.array([[ -9.26847000e-02, 2.65970000e-01, 3.29432000e-01,
-7.39718000e-02, -6.73928000e-02, -6.35561000e-02,
-9.60802000e-02, 3.04700000e-01, -6.34060000e-02,
-8.69368000e-02, 3.15142000e-01, 3.25048000e-01,
3.43239000e-01, 3.20784000e-01, 3.10090000e-01],
[ -4.08858000e-03, 3.86324000e-01, 4.39724000e-01,
1.41515000e-02, 2.05663000e-02, 3.34793000e-02,
2.29018000e-03, 4.27724000e-01, 2.35102000e-02,
5.34813000e-02, 4.69541000e-01, 4.56828000e-01,
4.67765000e-01, 4.43996000e-01, 4.19499000e-01],
[ -5.20866000e-02, 2.24091000e-01, 2.78965000e-01,
-6.02106000e-02, -4.93041000e-02, -4.81996000e-02,
-5.83473000e-02, 2.58325000e-01, -4.49707000e-02,
-5.12397000e-02, 2.65172000e-01, 2.76636000e-01,
2.64820000e-01, 2.66801000e-01, 2.78434000e-01],
[ 3.01420000e-01, 4.53456000e-01, 5.62255000e-01,
3.11544000e-01, 3.51967000e-01, 3.50391000e-01,
3.35528000e-01, 5.39958000e-01, 2.87258000e-01,
3.27332000e-01, 5.35088000e-01, 5.41420000e-01,
5.60140000e-01, 5.29896000e-01, 5.53655000e-01],
[ -3.34516000e-02, 2.78522000e-01, 3.37687000e-01,
-4.85574000e-02, -3.37070000e-02, -2.66458000e-02,
-2.92930000e-02, 3.31487000e-01, -1.53911000e-02,
-5.19083000e-02, 3.33022000e-01, 3.41722000e-01,
3.27839000e-01, 3.27849000e-01, 3.28789000e-01],
[ 4.43914000e-03, 2.36622000e-01, 2.76034000e-01,
-1.55010000e-04, 5.38064000e-03, -5.85116000e-03,
2.28113000e-03, 2.78247000e-01, 2.74197000e-03,
-6.24126000e-03, 2.78610000e-01, 2.80766000e-01,
2.66917000e-01, 2.85312000e-01, 2.73923000e-01],
[ 9.90938000e-02, 1.45888000e-01, 2.29572000e-01,
9.97999000e-02, 9.14779000e-02, 8.49883000e-02,
7.79318000e-02, 2.25150000e-01, 1.02579000e-01,
1.28821000e-01, 2.07060000e-01, 2.13385000e-01,
1.99332000e-01, 2.20373000e-01, 2.08831000e-01],
[ -1.86492000e-02, 2.84924000e-01, 3.22759000e-01,
-2.14725000e-02, -2.82217000e-02, -1.93443000e-02,
-2.08581000e-02, 3.39969000e-01, -1.27362000e-02,
-1.59953000e-02, 3.39407000e-01, 3.26889000e-01,
3.26662000e-01, 3.49180000e-01, 3.28859000e-01],
[ 1.83003000e-01, 4.95721000e-02, 6.46707000e-02,
1.68665000e-01, 1.95583000e-01, 1.93675000e-01,
1.67311000e-01, 5.65916000e-02, 1.59715000e-01,
1.80062000e-01, 6.36378000e-02, 6.98871000e-02,
5.53104000e-02, 8.59143000e-02, 8.49541000e-02],
[ 3.57093000e-01, -3.67925000e-02, -4.25365000e-02,
3.55424000e-01, 3.47844000e-01, 3.48721000e-01,
3.45803000e-01, -2.11651000e-02, 2.84238000e-01,
3.36385000e-01, -4.02782000e-02, -6.11450000e-02,
-2.28289000e-02, -2.58880000e-02, -4.36964000e-02],
[ 1.07747000e-01, 2.40536000e-01, 3.39735000e-01,
7.96501000e-02, 8.23634000e-02, 4.08718000e-02,
7.97752000e-02, 3.42705000e-01, 2.58616000e-02,
4.64636000e-02, 3.28200000e-01, 3.11190000e-01,
3.50902000e-01, 3.44092000e-01, 2.88163000e-01],
[ 1.96865000e-01, 1.54481000e-01, 1.73556000e-01,
2.03293000e-01, 2.04454000e-01, 2.27668000e-01,
2.29026000e-01, 1.77935000e-01, 1.91509000e-01,
2.39608000e-01, 2.07256000e-01, 1.88885000e-01,
1.90958000e-01, 1.98382000e-01, 1.93223000e-01],
[ 2.75430000e-02, 4.02585000e-01, 5.12363000e-01,
1.66888000e-02, 4.71756000e-02, 3.62513000e-02,
2.24278000e-02, 5.16299000e-01, 2.56757000e-02,
5.01242000e-02, 4.99019000e-01, 5.19048000e-01,
5.08627000e-01, 4.98673000e-01, 5.32897000e-01],
[ 1.09952000e-01, 3.68568000e-01, 4.66063000e-01,
8.67051000e-02, 1.35084000e-01, 1.05813000e-01,
1.52189000e-01, 4.54485000e-01, 8.05630000e-02,
1.16354000e-01, 4.81074000e-01, 4.34318000e-01,
4.68502000e-01, 4.56193000e-01, 4.51878000e-01],
[ 9.07705000e-01, -1.19126000e-01, -1.94586000e-01,
9.69742000e-01, 1.05544000e+00, 1.00079000e+00,
1.07812000e+00, -1.24645000e-01, 7.67645000e-01,
9.83039000e-01, -1.45237000e-01, -1.23262000e-01,
-1.80877000e-01, -1.68868000e-01, -1.31907000e-01],
[ 5.47304000e-02, 3.41735000e-01, 3.97924000e-01,
4.87275000e-02, 6.64988000e-02, 5.46800000e-02,
5.80973000e-02, 3.95630000e-01, 4.80677000e-02,
4.67798000e-02, 4.04524000e-01, 4.08717000e-01,
3.85432000e-01, 3.94440000e-01, 4.01805000e-01],
[ 1.87155000e-01, 3.79954000e-01, 4.83464000e-01,
2.13328000e-01, 2.56143000e-01, 1.99511000e-01,
2.31228000e-01, 4.62363000e-01, 1.79116000e-01,
1.93377000e-01, 4.76625000e-01, 4.80363000e-01,
4.68738000e-01, 4.96291000e-01, 4.62237000e-01],
[ 1.10135000e+00, 2.21877000e-01, 1.43174000e-01,
1.19311000e+00, 1.16273000e+00, 1.19129000e+00,
1.20193000e+00, 2.46340000e-01, 9.00804000e-01,
1.21326000e+00, 2.55334000e-01, 2.85639000e-01,
1.75352000e-01, 2.92858000e-01, 2.13424000e-01],
[ 1.24295000e-02, 2.05530000e-01, 2.35478000e-01,
1.06360000e-02, 1.25000000e-02, 1.39434000e-02,
6.19534000e-03, 2.32923000e-01, 2.25262000e-02,
1.63384000e-02, 2.49811000e-01, 2.45686000e-01,
2.39310000e-01, 2.44092000e-01, 2.49588000e-01],
[ 4.90046000e-02, 1.98349000e-01, 2.44335000e-01,
2.97860000e-02, 6.89736000e-02, 4.80883000e-02,
2.78426000e-02, 2.54369000e-01, 2.66232000e-02,
2.74745000e-02, 1.81589000e-01, 2.40744000e-01,
2.53391000e-01, 2.07375000e-01, 1.91917000e-01],
[ 4.16505000e-01, -7.59996000e-02, -1.09354000e-01,
4.26273000e-01, 4.31533000e-01, 4.29690000e-01,
4.25322000e-01, -1.01641000e-01, 3.21525000e-01,
4.01477000e-01, -9.70148000e-02, -1.15661000e-01,
-1.03311000e-01, -9.44064000e-02, -1.10988000e-01],
[ 1.64333000e-01, 1.08984000e-01, 1.92483000e-01,
1.67026000e-01, 1.98486000e-01, 1.30462000e-01,
1.68726000e-01, 1.62928000e-01, 1.57523000e-01,
1.97097000e-01, 1.78548000e-01, 1.51745000e-01,
1.84647000e-01, 1.49899000e-01, 1.59358000e-01],
[ -2.80124000e-01, 4.47964000e-01, 4.65301000e-01,
-3.12828000e-01, -3.04826000e-01, -3.28779000e-01,
-3.35304000e-01, 4.47377000e-01, -2.65617000e-01,
-2.91028000e-01, 4.60513000e-01, 4.65122000e-01,
4.68598000e-01, 4.98790000e-01, 5.08062000e-01],
[ -2.86070000e-02, 3.33408000e-01, 3.75448000e-01,
-2.74869000e-02, -2.43182000e-02, -3.28590000e-02,
-5.06823000e-02, 3.88905000e-01, -1.90741000e-02,
-2.09121000e-02, 4.09026000e-01, 3.76590000e-01,
4.20844000e-01, 4.02346000e-01, 4.01546000e-01],
[ -5.26256000e-02, 2.24553000e-01, 2.72307000e-01,
-6.59182000e-02, -5.10974000e-02, -5.03741000e-02,
-5.21351000e-02, 2.65349000e-01, -3.83339000e-02,
-6.08046000e-02, 2.80434000e-01, 2.74862000e-01,
2.72504000e-01, 2.74584000e-01, 2.68554000e-01],
[ 1.74568000e-01, 1.74784000e-01, 2.43011000e-01,
1.63657000e-01, 1.68196000e-01, 2.27396000e-01,
1.61371000e-01, 2.33325000e-01, 1.29902000e-01,
1.61663000e-01, 2.53071000e-01, 2.37139000e-01,
2.63988000e-01, 2.24518000e-01, 2.45118000e-01],
[ 1.16255000e-02, 2.70476000e-01, 3.56054000e-01,
4.01313000e-02, 5.06687000e-02, 1.23457000e-02,
1.54674000e-02, 3.49423000e-01, 1.75948000e-02,
3.63213000e-02, 3.41615000e-01, 3.45635000e-01,
3.60263000e-01, 3.62238000e-01, 3.64441000e-01],
[ -1.73951000e-02, 1.13664000e-01, 1.11284000e-01,
6.49500000e-02, 7.21068000e-02, 1.08015000e-01,
8.74592000e-02, 3.03161000e-01, 2.90478000e-02,
5.41663000e-02, 3.15965000e-01, 2.93228000e-01,
7.86849000e-02, 2.84163000e-01, 2.84195000e-01],
[ 2.54997000e-01, -1.63147000e-03, -1.65853000e-03,
2.66901000e-01, 2.66949000e-01, 2.63034000e-01,
2.64883000e-01, -1.57527000e-02, 2.29119000e-01,
2.58299000e-01, -8.23761000e-03, -7.07062000e-03,
-2.50627000e-02, -1.55307000e-02, -1.21995000e-02],
[ 1.01620000e+00, 2.76538000e-01, 3.51427000e-01,
1.09238000e+00, 1.09250000e+00, 1.05259000e+00,
1.05301000e+00, 3.97067000e-01, 8.68429000e-01,
1.07596000e+00, 3.62113000e-01, 3.74292000e-01,
3.19586000e-01, 4.03213000e-01, 3.68771000e-01],
[ -1.63824000e-01, 4.41900000e-01, 5.36594000e-01,
-1.86510000e-01, -1.65381000e-01, -2.02298000e-01,
-1.81492000e-01, 5.48222000e-01, -1.36803000e-01,
-1.74517000e-01, 5.60032000e-01, 5.66377000e-01,
5.51512000e-01, 5.52923000e-01, 5.57018000e-01],
[ 2.98013000e-01, -7.65028000e-03, -2.32811000e-02,
3.07960000e-01, 3.12602000e-01, 3.06633000e-01,
3.16460000e-01, -1.36456000e-02, 2.46844000e-01,
3.04799000e-01, -1.25610000e-02, -9.20712000e-03,
-8.56796000e-03, 1.98607000e-03, 3.82157000e-03],
[ 3.65859000e-01, -7.48346000e-02, -1.15064000e-01,
4.00183000e-01, 3.82936000e-01, 3.82595000e-01,
3.76927000e-01, -8.22186000e-02, 2.96645000e-01,
3.88128000e-01, -7.99636000e-02, -9.87358000e-02,
-1.12466000e-01, -9.98273000e-02, -9.68927000e-02],
[ -9.37148000e-03, 3.60113000e-01, 4.43448000e-01,
-7.91685000e-03, -1.44240000e-02, 3.28721000e-02,
-5.84508000e-04, 4.32181000e-01, -6.86684000e-03,
7.33296000e-03, 4.15355000e-01, 4.22542000e-01,
4.09812000e-01, 4.63949000e-01, 4.59877000e-01],
[ 5.99695000e-01, 5.82567000e-03, 2.00375000e-02,
6.23339000e-01, 6.27463000e-01, 6.18225000e-01,
6.46408000e-01, 2.40243000e-02, 5.03365000e-01,
6.37536000e-01, 1.70065000e-02, 1.07638000e-02,
7.66832000e-03, 3.15754000e-02, 8.23876000e-05],
[ -3.07969000e-04, 2.74125000e-01, 3.15739000e-01,
-2.54004000e-02, -4.83835000e-02, -2.77834000e-02,
-2.86206000e-02, 3.27560000e-01, -6.99667000e-03,
-2.38075000e-02, 3.42171000e-01, 3.46789000e-01,
3.26242000e-01, 3.04439000e-01, 3.53769000e-01],
[ 7.28510000e-02, 2.55890000e-01, 2.80280000e-01,
8.03866000e-02, 9.69812000e-02, 8.51930000e-02,
8.05826000e-02, 3.15382000e-01, 6.11170000e-02,
6.85091000e-02, 3.21162000e-01, 2.85870000e-01,
2.89914000e-01, 2.95880000e-01, 2.76949000e-01],
[ 7.77194000e-02, 3.62860000e-01, 4.45740000e-01,
8.73820000e-02, 8.70558000e-02, 1.08332000e-01,
1.21860000e-01, 4.53079000e-01, 7.65831000e-02,
8.84698000e-02, 4.22669000e-01, 4.40995000e-01,
4.64719000e-01, 4.73536000e-01, 4.35081000e-01],
[ 3.53605000e-01, 9.97475000e-02, 1.29357000e-01,
3.82858000e-01, 3.54827000e-01, 3.71830000e-01,
3.73910000e-01, 1.22202000e-01, 2.88347000e-01,
3.54923000e-01, 1.35949000e-01, 1.20212000e-01,
1.10296000e-01, 1.00215000e-01, 9.97603000e-02],
[ -1.50924000e-02, 2.63741000e-01, 3.05744000e-01,
-6.60809000e-03, 8.13357000e-03, -1.22735000e-01,
7.88794000e-03, 3.06299000e-01, -2.52105000e-04,
6.39410000e-03, 2.87691000e-01, 3.58888000e-01,
3.39875000e-01, 3.63744000e-01, 3.67565000e-01],
[ -2.98269000e-02, 2.88764000e-01, 3.49127000e-01,
-2.34874000e-02, -1.09503000e-02, -2.99691000e-02,
-2.59696000e-02, 3.57171000e-01, -1.65084000e-02,
-2.04130000e-02, 3.61638000e-01, 3.39184000e-01,
3.56305000e-01, 3.27946000e-01, 3.55257000e-01],
[ -3.63559000e-02, 4.01041000e-01, 4.70182000e-01,
-2.43535000e-02, -3.15568000e-02, -4.57663000e-02,
-5.28870000e-02, 5.26694000e-01, -1.30382000e-02,
-5.82190000e-02, 5.17938000e-01, 5.04228000e-01,
5.22251000e-01, 5.09747000e-01, 5.25493000e-01],
[ 4.06625000e-01, 5.88169000e-02, 5.23246000e-02,
4.57369000e-01, 4.34374000e-01, 4.52388000e-01,
4.53600000e-01, 5.34897000e-02, 3.31688000e-01,
4.35773000e-01, 5.65922000e-02, 6.36120000e-02,
5.00163000e-02, 6.17278000e-02, 4.91210000e-02],
[ -2.67663000e-01, 5.17257000e-01, 5.58155000e-01,
-2.75936000e-01, -2.87685000e-01, -3.11489000e-01,
-3.12920000e-01, 6.09635000e-01, -2.23530000e-01,
-2.61998000e-01, 6.39847000e-01, 6.07620000e-01,
6.06197000e-01, 5.98449000e-01, 6.15610000e-01],
[ 3.68540000e-02, 5.17398000e-01, 6.12172000e-01,
3.21888000e-02, 9.60634000e-03, 8.27719000e-04,
1.60307000e-02, 6.25721000e-01, 4.01135000e-03,
1.75771000e-02, 6.37388000e-01, 6.36084000e-01,
6.29151000e-01, 6.34905000e-01, 6.04959000e-01],
[ 2.08616000e-01, 3.98010000e-01, 4.84163000e-01,
2.15345000e-01, 2.31141000e-01, 2.32099000e-01,
2.33411000e-01, 4.88344000e-01, 1.65377000e-01,
2.28438000e-01, 4.74110000e-01, 5.02621000e-01,
4.92219000e-01, 4.77340000e-01, 5.08377000e-01],
[ -3.22720000e-03, 6.78466000e-01, 8.35841000e-01,
-6.14328000e-03, 4.60543000e-04, 4.98184000e-04,
2.78292000e-03, 8.45752000e-01, -8.08344000e-03,
4.74270000e-03, 9.34708000e-01, 8.63915000e-01,
8.84651000e-01, 8.71871000e-01, 8.65056000e-01],
[ 3.21129000e-02, 2.64005000e-01, 3.21095000e-01,
9.35817000e-03, 2.49811000e-02, 2.85817000e-02,
2.13276000e-02, 3.36613000e-01, 3.53961000e-02,
3.41373000e-03, 3.32283000e-01, 3.26185000e-01,
3.28905000e-01, 3.34830000e-01, 3.29323000e-01],
[ 1.07901000e-01, 1.37530000e-01, 2.03443000e-01,
1.15241000e-01, 1.10851000e-01, 9.12920000e-02,
7.92964000e-02, 2.18319000e-01, 7.24521000e-02,
8.76965000e-02, 2.00012000e-01, 1.74089000e-01,
2.16369000e-01, 2.18103000e-01, 1.93736000e-01],
[ 3.12001000e-01, 1.59224000e-01, 1.71225000e-01,
3.16165000e-01, 3.48506000e-01, 3.43921000e-01,
3.31858000e-01, 1.60152000e-01, 2.62346000e-01,
3.24303000e-01, 2.07817000e-01, 1.75092000e-01,
1.99093000e-01, 1.94725000e-01, 1.61459000e-01]])
# 将词向量中值最大的维度的下标作为该词向量的标签
label = []
for values in word_embedding:
label.append(np.argmax(values))
# 将词向量转化为2维向量
fea = TSNE(n_components=2).fit_transform(word_embedding)
pdf = PdfPages('word_embedding_scatter.pdf')
# 画散点图
# 更多颜色请查看[https://www.cnblogs.com/qianblue/p/10783261.html]
cValue = ['red','yellow','green','blue','orangered','steelblue','slateblue','tomato','peru','darkorange','deeppink','crimson']
cls = np.unique(label)
fea_num = [fea[label == i] for i in cls]
for i, f in enumerate(fea_num):
if cls[i] in range(10): # 如果类别标签为10以内的数字,则使用'+'进行标记
plt.scatter(f[:, 0], f[:, 1], label=cls[i], marker='+', edgecolor='none',c=cValue[i])
else:
plt.scatter(f[:, 0], f[:, 1], label=cls[i],edgecolor='none',c=cValue[i])
plt.tight_layout()
pdf.savefig()
plt.show()
pdf.close()