# coding: utf-8
import random
import csv
import tensorflow as tf
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
f = open("result_78.csv", "a+", encoding='utf-8')
writer_csv = csv.writer(f)
result=[]
num_classes=2
data=pd.DataFrame(pd.read_csv('/home/henson/Desktop/huanping/edges.csv_EDGE_NBD12.csv',encoding='gb18030'))
data.head()
sess = tf.Session()
X = np.array(data[['author_degree1','author_degree2','No']])
y = np.array(data[['isBD']])
nodeid1=np.array((data[['Nodeid1']]))
nodeid2=np.array((data[['Nodeid2']]))
nodeid1=nodeid1[:,0]
nodeid2=nodeid2[:,0]
#y=y[:,0]
#y = (np.arange(2) == y[:,None]).astype(np.float32)
#print(X,y)
#print(y)
#y = tf.one_hot(y, depth=2)
#print(y)
#y_train = tf.cast(y, tf.int32)
#print(y_train)
#print(X,y[:,0])
StandardScaler= StandardScaler()
X_Standard = StandardScaler.fit_transform(X)
#y_Standard = StandardScaler.fit_transform(y)
X_train,X_test,y_train,y_test = train_test_split(X_Standard,y,test_size=0.9,random_state=0)
X_train = scale(X_train)
#X_test = scale(X_test)
y_train = y_train[:,0]
y_test = y_test[:,0]
y_train = (np.arange(2) == y_train[:,None]).astype(np.float32)
y_test_ = (np.arange(2) == y_test[:,None]).astype(np.float32)
#y_train = scale(y.reshape((-1,1)))
#y_test = scale(y_test.reshape((-1,1)))
def add_layer(inputs,input_size,output_size,activation_function=None):
with tf.variable_scope("Weights"):
Weights = tf.Variable(tf.random_normal(shape=[input_size,output_size]),name="weights")
tf.summary.histogram('Weights', Weights)
with tf.variable_scope("biases"):
biases = tf.Variable(tf.zeros(shape=[1,output_size]) + 0.1,name="biases")
tf.summary.histogram('biases', biases)
with tf.name_scope("Wx_plus_b"):
Wx_plus_b = tf.matmul(inputs,Weights) + biases
with tf.name_scope("dropout"):
Wx_plus_b = tf.nn.dropout(Wx_plus_b,keep_prob=keep_prob_s)
if activation_function is None:
return Wx_plus_b
else:
with tf.name_scope("activation_function"):
return activation_function(Wx_plus_b)
xs = tf.placeholder(shape=[None,X_train.shape[1]],dtype=tf.float32,name="inputs")
ys = tf.placeholder(shape=[None,2],dtype=tf.float32)
#ys = tf.placeholder(shape=[None,num_classes],dtype=tf.float32)
print(ys.shape)
keep_prob_s = tf.placeholder(dtype=tf.float32)
with tf.name_scope("layer_1"):
l1 = add_layer(xs,3,10,activation_function=tf.nn.relu)
with tf.name_scope("layer_2"):#
l2 = add_layer(l1,10,10,activation_function=tf.nn.relu)
with tf.name_scope("y_pred"):
#pred = add_layer(l1,10,1)
logits = add_layer(l1, 10, num_classes)
print("logits:",logits)
predicted_labels=tf.arg_max(logits, 1)
with tf.name_scope("loss"):
#loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - logits),reduction_indices=[1]))
#loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=ys,logits=tf.argmax(logits,1)))
#loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=ys, logits=logits))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=ys, logits=logits))
tf.summary.scalar("loss",tensor=loss)
with tf.name_scope("train"):
train_op =tf.train.GradientDescentOptimizer(learning_rate=0.03).minimize(loss)
#train_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
correct_prediction = tf.equal(tf.arg_max(logits, 1), tf.arg_max(ys, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", tensor=accuracy)
def fit(X, y, n, keep_prob,isTrain):
init = tf.global_variables_initializer()
#feed_dict_train = {ys:y[:,:], xs: X, keep_prob_s: keep_prob}
feed_dict_train = {ys: y, xs: X, keep_prob_s: keep_prob}
with tf.Session() as sess:
if isTrain:
saver = tf.train.Saver(tf.global_variables(), max_to_keep=15) # 最大保存的N个Checkpoints文件
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter(logdir="nn_huanping_log", graph=sess.graph) # 写tensorbord
sess.run(init)
for i in range(n):
_loss, _ = sess.run([loss, train_op], feed_dict=feed_dict_train)
if i % 100 == 0:
print("epoch:%d/tloss:%.5f " % (i, _loss))
acc = sess.run(accuracy, feed_dict=feed_dict_train)
print(acc)
rs = sess.run(merged, feed_dict=feed_dict_train)
writer.add_summary(summary=rs, global_step=i) # 写tensorbord
saver.save(sess=sess, save_path="model/nn_huanping.model", global_step=i) # 保存模型
else:
ckpt = tf.train.get_checkpoint_state("model/")
if ckpt and ckpt.model_checkpoint_path:
saver = tf.train.Saver()
saver.restore(sess, ckpt.model_checkpoint_path)
#print(sess.run(Weights)) # 输出训练模型保存的权重和偏置量
#print(sess.run(bias))
pred_test, acc = sess.run([predicted_labels, accuracy], feed_dict=feed_dict_train)
#pred_test = sess.run([predicted_labels], feed_dict=feed_dict_train)
#print("prediction:" ,pred_test,"accuracy:%f"%(acc))
#size=len(pred_test)
print(acc)
A=np.array([1, 1, 1])
B = np.array([2, 2, 2])
A = A[:, np.newaxis] #增加维度
B = B[:, np.newaxis]
print(A.shape)
print(B.shape)
print(nodeid1.shape)
print(nodeid2.shape)
for i in range(0,len(pred_test)):
result.append((nodeid1[i], nodeid2[i], pred_test[i]))
print(result)
writer_csv.writerows(result)
#print(nodeid1[i], nodeid2[i], pred_test[i])
#print(pred_test)
#result = np.concatenate((A,B), axis=1) #纵向排列
#print(result)
#print( y_test,acc)
"""预测输出10个label
sample_indexes = random.sample(range(len(y_test)), 10)
X_test_min = [X_test[i] for i in sample_indexes]
y_test_min = [y_test[i] for i in sample_indexes]
# Run the "predicted_labels" op.
#predicted = sess.run(predicted_labels, feed_dict={ys: y_test_min, xs: X_test_min, keep_prob_s: 1.0})
predicted = sess.run(predicted_labels, feed_dict={xs: X_test_min,keep_prob_s:0.8})
print(y_test_min)
print(predicted)
"""
#fit(X_train, y_train,10000, 0.5, True)
fit(X_test, y_test_,10000, 1.0, False)
零散笔记
1.不能用tensor对象输入,tensor对象不是数组了
2.可以通过 tf.histogram_summary(layer_name + ‘/weights’, Weights) 追踪权重值和bias 不知道能不能通过session.run()来获取输出
3.预测的 label 通过 predicted_labels=tf.arg_max(logits, 1) 返回分类标签值
Q:为什么想通过array增加维度 把nodeid1、nodeid2、 pred_test纵向排列生成一个新的array不行???EXCU?
nodeid1 = nodeid1[:, np.newaxis] #增加维度
nodeid2 = nodeid2[:, np.newaxis]
result = np.concatenate((nodeid1,nodeid2), axis=1)