import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Bidirectional, LSTM
from keras.optimizers import SGD, Adagrad
from keras.callbacks import TensorBoard
# Generate dummy data
import numpy as np
import codecs
np.random.seed(3) # 固定seed让每次的random都一样
f = codecs.open("xlj_blc.txt", 'r', 'utf-8') # codecs包指定TXT打开方式
lines = f.readlines()
X = []
Y = []
for line in lines: #68维
x = line.split(" ",1)[1].replace("[","").replace("]","").split(",")
x = map(eval, x)
#x=np.array(x,dtype=float)
y = line.split(" ", 1)[0]
y = int(y)
# if y==0:
# y=[1,0,0,0,0]
# elif y==1:
# y = [0, 1, 0, 0, 0]
# elif y==2:
# y = [0, 0, 1, 0, 0]
# elif y==3:
# y = [0, 0, 0, 1, 0]
# elif y==4:
# y = [0, 0, 0, 0, 1]
X.append(x)
Y.append([y])
#print(Y)
x_train = np.array(X)
y_train = keras.utils.to_categorical(np.array(Y), num_classes=5)
print x_train
print y_train
f = codecs.open("csj_blc.txt",'r','utf-8') #codecs包指定TXT打开方式
lines=f.readlines()
X=[]
Y=[]
for line in lines: #68维
x = line.split(" ",1)[1].replace("[","").replace("]","").split(",")
x = map(eval, x)
#x=np.array(x,dtype=float)
y = line.split(" ", 1)[0]
y = int(y)
# if y==0:
# y=[1,0,0,0,0]
# elif y==1:
# y = [0, 1, 0, 0, 0]
# elif y==2:
# y = [0, 0, 1, 0, 0]
# elif y==3:
# y = [0, 0, 0, 1, 0]
# elif y==4:
# y = [0, 0, 0, 0, 1]
X.append(x)
Y.append([y])
x_test = np.array(X)
y_test = keras.utils.to_categorical(np.array(Y), num_classes=5) #将整数向量转换为热编码(其实跟我自己写的一样)
print x_test
print y_test
# x_train = np.random.random((10, 10)) #20维度,1000个 #1000个1维 10类,
# y_train = keras.utils.to_categorical(np.random.randint(3, size=(10, 1)), num_classes=3) #10类,1000个10维
# x_test = np.random.random((10, 10))
# y_test = keras.utils.to_categorical(np.random.randint(3, size=(10, 1)), num_classes=3)
# x_train= np.array([[1.9629345734914143,3.196568012237549,0.23380190134048462],[-1.5588303009668987,0.39092262585957843,0.4524924159049988],[0.326497220993042,-5.757664465904236,1.7457123756408692],[0.326497220993042,-5.757664465904236,1.7457123756408692]])
# y_train=np.array([[1,0,0],[0,1,0],[0,0,1],[0,0,1]])
# x_test= np.array([[1.9629345734914143,3.196568012237549,0.23380190134048462],[-1.5588303009668987,0.39092262585957843,0.4524924159049988],[0.326497220993042,-5.757664465904236,1.7457123756408692],[0.326497220993042,-5.757664465904236,1.7457123756408692]])
# y_test=np.array([[1,0,0],[0,1,0],[0,0,1],[0,0,1]])
#创建一个模型
model = Sequential()
print (x_train[76])
print (y_train[78])
print (x_test[35])
print (y_test[97])
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
# #添加神经网络层及激活函数
model.add(Dense(128, activation='relu', input_dim=68))#输入维度 64个隐元 DENSE全连接层
model.add(Dropout(0.7)) #为了防止过拟合将一部分神经网络单元暂时丢弃
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.7))
# model.add(Dense(16, activation='tanh'))
# model.add(Dropout(0.2))
model.add(Dense(5, activation='softmax')) #分类类别数 #输出层使用softmax激活功能,为了确保输出值在0,1之间,可以用作预测概率
# model = Sequential()
# model.add(Bidirectional(LSTM(10, return_sequences=True), input_shape=(3187,68)))
# model.add(Bidirectional(LSTM(10)))
# model.add(Dense(5))
# model.add(Activation('softmax'))
model.summary()
#使用岁时函数和优化函数
# sgd = SGD(lr=0.01, decay=0.0, momentum=0.0, nesterov=False)
adagrad = Adagrad(0.01)
model.compile(loss='categorical_crossentropy', #使用具有对数损失函数的高效Adam梯度下降优化算法
optimizer=adagrad,
metrics=['accuracy'])
tb_cb = TensorBoard(log_dir="/home/ecust/hmq/lunwen/vec_similarity/logs/v28")
#开始训练
a=model.fit(x_train, y_train,
epochs=200, #训练轮数
batch_size=16,
validation_data=(x_test, y_test),
callbacks=[tb_cb])
# #损失函数和精确度
# score = model.evaluate(x_test, y_test, batch_size=128)
# print('Test loss:', score[0])
# print('Test accuracy:', score[1])
#预测
classes = model.predict(x_test, batch_size=32)
print classes