项目进度:
项目已经接近尾声,正确率经过训练已经突破九成,通过bootstrap+springboot搭建的网页,可视化的展示 训练过程和考生答案的聚类结果。
#!/usr/bin/python
# encoding=utf-8
# -*- coding:utf-8 -*
# 切换工作路径
import os
import sys
import csv
import pandas as pd
os.chdir(os.path.split(os.path.realpath(sys.argv[0]))[0])
import numpy
from numpy import *
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
class chj_data(object):
def __init__(self, data, target):
self.data = data
self.target = target
def chj_load_file(fdata, ftarget):
data = numpy.loadtxt(fdata, dtype=float32)
target = numpy.loadtxt(ftarget, dtype=int32)
print(data.shape)
print(target.shape)
# pexit()
res = chj_data(data, target)
return res
fdata = "bert_replace50.txt"
ftarget = "input1-1-mode.csv"
# iris = load_iris() # 使用sklearn自带的测试文件
# iris = chj_load_file(fdata, ftarget)
csv_file = open('bert_replace50.csv') # 打开csv文件
csv_reader_lines = csv.reader(csv_file) # 逐行读取csv文件
date = [] # 创建列表准备接收csv各行数据
for one_line in csv_reader_lines:
date.append(one_line) # 将读取的csv分行数据按行存入列表‘date’中
df_label = pd.read_csv('input1-1-mode.csv', header=0, encoding='gbk', low_memory=False)
p_list = df_label['1p'].tolist()
# print(iris.data)
# print(iris.target)
# exit()
X_tsne = TSNE(n_components=3, learning_rate=1000).fit_transform(date[0:60])
# X_pca = PCA().fit_transform(iris.data)
# # plt.subplot(121)
#
#
# plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=p_list[0:50])
# # plt.subplot(122)
# # plt.scatter(X_pca[:, 0], X_pca[:, 1], c=iris.target)
# plt.colorbar()
# plt.show()
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
n = 100
# For each set of style and range settings, plot n random points in the box
# defined by x in [23, 32], y in [0, 100], z in [zlow, zhigh].
ax.scatter(X_tsne[:, 0], X_tsne[:, 1], X_tsne[:, 2], c=p_list[0:60])
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
plt.show()