参考文献与代码:基于深度Q学习方法的多用户蜂窝网络功率分配-孟凡,陈鹏,吴乐南
提出一个两步训练框架。1)通过模拟环境下的离线学习, 使用深度Q学习(DQL)算法训练深度Q网络(DQN)。2)DQN将在在线训练过程中使用 真实数据进行进一步微调。
简单网络模型:在一个有N个小区的系统中,在每个小区的中心有一个基站(BS)同时为K个用户提供共享频段的服务。(N = 25个cell的蜂窝网络,每个小区中有一个基站同时为4个用户提供服务)
研究了不同的贴现因子γ的性能。我 们设置γ∈{0.0,0.1,0.3,0.7,0.9},训练期间的平均率C¯所下示。在同一时段,显然γ∈{0.7,0.9}较高的C¯的值低于其他γ值较低的C¯的值。然后在三个不同单元号的 蜂窝网络中对训练好的dqn进行测试。
如上图所示γ= 0. 0的DQN获得的C¯分数最高,而γ值最高的DQN获得的C¯分数最低。
采用所有五种功率分配方案在1000个时隙中的比较(曲线通过平均窗口平滑)。
作者研究了具有IMAC的蜂窝网络中的PA问题,并应用 数据驱动的无模型DQL来解决这一问题。为了与PA优 化目标保持一致,使用当前的sum-rate作为奖励函数, 不包括未来的奖励。
开源代码:
dqn_icc.py
# -*- coding: utf-8 -*-
"""
Created on Sat Sep 15 11:24:43 2018
Q / gamma = 0
minimum transmit power: 5dBm/ maximum: 38dBm
bandwidth 10MHz
AWGN power -114dBm
path loss 120.9+37.6log10(d) (dB) d: transmitting distance (km)
using interferers' set and therefore reducing the computation complexity
multiple users / single BS
FP algorithm, WMMSE algorithm, maximum power, random power allocation schemes as comparisons
downlink
@author: mengxiaomao / Email: mengxiaomaomao@outlook.com
"""
import os
import time
import scipy.spatial
import random
import numpy as np
from scipy import io
import tensorflow as tf
from collections import deque
from PA_alg import PA_alg
reuse = tf.AUTO_REUSE
dtype = np.float32
flag_fig = True
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
fd = 10
Ts = 20e-3
Ns = 5e1 #5e1 1e3
n_x = 5
n_y = 5
L = 2
C = 16
meanM = 2 # lamda: average user number in one BS
minM = 4 # maximum user number in one BS
maxM = 4 # maximum user number in one BS
min_dis = 0.01 #km
max_dis = 1. #km
min_p = 5. #dBm
max_p = 38. #dBm
p_n = -114. #dBm
power_num = 10 #action_num
OBSERVE = 100
EPISODE = 1000
TEST_EPISODE = 500
memory_size = 5000
INITIAL_EPSILON = 0.2
FINAL_EPSILON = 0.0001
learning_rate = 0.001
train_interval = 10
batch_size = 256
c = 3*L*(L+1) + 1 # adjascent BS
K = maxM * c # maximum adjascent users, including itself
state_num = 3*C + 2 # 3*K - 1 3*C + 2
N = n_x * n_y # BS number
M = N * maxM # maximum users
W_ = np.ones((M), dtype = dtype) #[M]
sigma2_ = 1e-3*pow(10., p_n/10.)
maxP = 1e-3*pow(10., max_p/10.)
power_set = np.hstack([np.zeros((1), dtype=dtype), 1e-3*pow(10., np.linspace(min_p, max_p, power_num-1)/10.)])
replay_memory = deque(maxlen = memory_size)
weight_file = os.getcwd() + '\dqn_00.mat'
hist_file = 'C:/Users/86151/PycharmProjects/'
def Generate_H_set():
'''
Jakes model
'''
H_set = np.zeros([M,K,int(Ns)], dtype=dtype)
pho = np.float32(scipy.special.k0(2*np.pi*fd*Ts))
H_set[:,:,0] = np.kron(np.sqrt(0.5*(np.random.randn(M, c)**2+np.random.randn(M, c)**2)), np.ones((1,maxM), dtype=np.int32))
for i in range(1,int(Ns)):
H_set[:,:,i] = H_set[:,:,i-1]*pho + np.sqrt((1.-pho**2)*0.5*(np.random.randn(M, K)**2+np.random.randn(M, K)**2))
path_loss = Generate_path_loss()
H2_set = np.square(H_set) * np.tile(np.expand_dims(path_loss, axis=2), [1,1,int(Ns)])
return H2_set
def Generate_environment():
path_matrix = M*np.ones((n_y + 2*L, n_x + 2*L, maxM), dtype = np.int32)
for i in range(L, n_y+L):
for j in range(L, n_x+L):
for l in range(maxM):
path_matrix[i,j,l] = ((i-L)*n_x + (j-L))*maxM + l
p_array = np.zeros((M, K), dtype = np.int32)
for n in range(N):
i = n//n_x
j = n%n_x
Jx = np.zeros((0), dtype = np.int32)
Jy = np.zeros((0), dtype = np.int32)
for u in range(i-L, i+L+1):
v = 2*L+1-np.abs(u-i)
jx = j - (v-i%2)//2 + np.linspace(0, v-1, num = v, dtype = np.int32) + L
jy = np.ones((v), dtype = np.int32)*u + L
Jx = np.hstack((Jx, jx))
Jy = np.hstack((Jy, jy))
for l in range(maxM):
for k in range(c):
for u in range(maxM):
p_array[n*maxM+l,k*maxM+u] = path_matrix[Jy[k],Jx[k],u]
p_main = p_array[:,(c-1)//2*maxM:(c+1)//2*maxM]
for n in range(N):
for l in range(maxM):
temp = p_main[n*maxM+l,l]
p_main[n*maxM+l,l] = p_main[n*maxM+l,0]
p_main[n*maxM+l,0] = temp
p_inter = np.hstack([p_array[:,:(c-1)//2*maxM], p_array[:,(c+1)//2*maxM:]])
p_array = np.hstack([p_main, p_inter])
user = np.maximum(np.minimum(np.random.poisson(meanM, (N)), maxM), minM)
user_list = np.zeros((N, maxM), dtype = np.int32)
for i in range(N):
user_list[i,:user[i]] = 1
for k in range(N):
for i in range(maxM):
if user_list[k,i] == 0.:
p_array = np.where(p_array == k*maxM+i, M, p_array)
p_list = list()
for i in range(M):
p_list_temp = list()
for j in range(K):
p_list_temp.append([p_array[i,j]])
p_list.append(p_list_temp)
return p_array, p_list, user_list
def Generate_path_loss():
slope = 0. #0.3
p_tx = np.zeros((n_y, n_x))
p_ty = np.zeros((n_y, n_x))
p_rx = np.zeros((n_y, n_x, maxM))
p_ry = np.zeros((n_y, n_x, maxM))
dis_rx = np.random.uniform(min_dis, max_dis, size = (n_y, n_x, maxM))
phi_rx = np.random.uniform(-np.pi, np.pi, size = (n_y, n_x, maxM))
for i in range(n_y):
for j in range(n_x):
p_tx[i,j] = 2*max_dis*j + (i%2)*max_dis
p_ty[i,j] = np.sqrt(3.)*max_dis*i
for k in range(maxM):
p_rx[i,j,k] = p_tx[i,j] + dis_rx[i,j,k]*np.cos(phi_rx[i,j,k])
p_ry[i,j,k] = p_ty[i,j] + dis_rx[i,j,k]*np.sin(phi_rx[i,j,k])
dis = 1e10 * np.ones((M, K), dtype = dtype)
lognormal = np.zeros((M, K), dtype = dtype)
for k in range(N):
for l in range(maxM):
for i in range(c):
for j in range(maxM):
if p_array[k*maxM+l,i*maxM+j] < M:
bs = p_array[k*maxM+l,i*maxM+j]//maxM
dx2 = np.square((p_rx[k//n_x][k%n_x][l]-p_tx[bs//n_x][bs%n_x]))
dy2 = np.square((p_ry[k//n_x][k%n_x][l]-p_ty[bs//n_x][bs%n_x]))
distance = np.sqrt(dx2 + dy2)
dis[k*maxM+l,i*maxM+j] = distance
std = 8. + slope * (distance - min_dis)
lognormal[k*maxM+l,i*maxM+j] = np.random.lognormal(sigma = std)
path_loss = lognormal*pow(10., -(120.9 + 37.6*np.log10(dis))/10.)
return path_loss
def Calculate_rate():
maxC = 1000.
P_extend = tf.concat([P, tf.zeros((1), dtype=dtype)], axis=0)
P_matrix = tf.gather_nd(P_extend, p_list)
path_main = tf.multiply(H2[:,0], P_matrix[:,0])
path_inter = tf.reduce_sum(tf.multiply(H2[:,1:], P_matrix[:,1:]), axis=1)
sinr = path_main / (path_inter + sigma2)
sinr = tf.minimum(sinr, maxC) #capped sinr
rate = W * tf.log(1. + sinr)/np.log(2)
rate_extend = tf.concat([rate, tf.zeros((1), dtype=dtype)], axis=0)
rate_matrix = tf.gather_nd(rate_extend, p_list)
sinr_norm_inv = H2[:,1:] / tf.tile(H2[:,0:1], [1,K-1])
sinr_norm_inv = tf.log(1. + sinr_norm_inv)/np.log(2)# log representation
reward = tf.reduce_sum(rate)
return rate_matrix, sinr_norm_inv, P_matrix, reward
def Generate_state(rate_last, p_last, sinr_norm_inv):
'''
Generate state matrix
ranking
state including:
1.rate[t-1] [M,K] rate_last
2.power[t-1] [M,K] p_last
3.sinr_norm_inv[t] [M,K-1] sinr_norm_inv
'''
# s_t = np.hstack([rate_last])
# s_t = np.hstack([rate_last, sinr_norm_inv])
indices1 = np.tile(np.expand_dims(np.linspace(0, M-1, num=M, dtype=np.int32), axis=1),[1,C])
indices2 = np.argsort(sinr_norm_inv, axis = 1)[:,-C:]
rate_last = np.hstack([rate_last[:,0:1], rate_last[indices1, indices2+1]])
p_last = np.hstack([p_last[:,0:1], p_last[indices1, indices2+1]])
sinr_norm_inv = sinr_norm_inv[indices1, indices2]
s_t = np.hstack([rate_last, p_last, sinr_norm_inv])
return s_t
def Variable(shape):
w = tf.get_variable('w', shape=shape, initializer = tf.truncated_normal_initializer(stddev=0.1))
b = tf.get_variable('b', shape=[shape[-1]], initializer = tf.constant_initializer(0.01))
return w, b
def Find_params(para_name):
sets=[]
for var in tf.trainable_variables():
if not var.name.find(para_name):
sets.append(var)
return sets
#def Network(s, a, name):
# with tf.variable_scope(name + '.0', reuse = reuse):
# w,b = Variable([state_num, 64])
# l = tf.nn.relu(tf.matmul(s, w)+b)
# with tf.variable_scope(name + '.2', reuse = reuse):
# w,b = Variable([64, power_num])
# q_hat = tf.matmul(l, w) + b
# r = tf.reduce_sum(tf.multiply(q_hat, a), reduction_indices = 1)
# a_hat = tf.argmax(q_hat, 1)
# list_var = Find_params(name)
# return q_hat, a_hat, r, list_var
def Network(s, a, name):
with tf.variable_scope(name + '.0', reuse = reuse):
w,b = Variable([state_num, 128])
l = tf.nn.relu(tf.matmul(s, w)+b)
with tf.variable_scope(name + '.1', reuse = reuse):
w,b = Variable([128, 64])
l = tf.nn.relu(tf.matmul(l, w)+b)
with tf.variable_scope(name + '.2', reuse = reuse):
w,b = Variable([64, power_num])
q_hat = tf.matmul(l, w) + b
r = tf.reduce_sum(tf.multiply(q_hat, a), reduction_indices = 1)
a_hat = tf.argmax(q_hat, 1)
list_var = Find_params(name)
return q_hat, a_hat, r, list_var
def Loss(y, r):
cost = tf.reduce_mean(tf.square((y - r)))
return cost
def Optimizer(cost, var_list):
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(learning_rate, global_step = global_step,
decay_steps = EPISODE, decay_rate = 0.1)
add_global = global_step.assign_add(1)
with tf.variable_scope('opt', reuse = reuse):
train_op = tf.train.AdamOptimizer(learning_rate = lr).minimize(cost, var_list = var_list)
return train_op, add_global
def Save_store(s_t, a_t, r_t, s_next):
r_t = np.tile(r_t, (M))
p_t = np.zeros((M, power_num), dtype = dtype)
p_t[range(M), a_t] = 1.
for i in range(M):
replay_memory.append((s_t[i], p_t[i], r_t[i], s_next[i]))
def Sample():
minibatch = random.sample(replay_memory, batch_size)
batch_s = [d[0] for d in minibatch]
batch_a = [d[1] for d in minibatch]
batch_r = [d[2] for d in minibatch]
return batch_s, batch_a, batch_r
def Select_action(sess, s_t, episode):
if episode > OBSERVE:
epsilon = INITIAL_EPSILON - (episode - OBSERVE) * (INITIAL_EPSILON - FINAL_EPSILON) / (EPISODE - OBSERVE)
elif episode <= OBSERVE:
epsilon = INITIAL_EPSILON
else:
epsilon = 0.
q_hat_ = sess.run(q_main, feed_dict={s: s_t}) #[M, power_num]
best_action = np.argmax(q_hat_, axis = 1)
random_index = np.array(np.random.uniform(size = (M)) < epsilon, dtype = np.int32)
random_action = np.random.randint(0, high = power_num, size = (M))
action_set = np.vstack([best_action, random_action])
power_index = action_set[random_index, range(M)] #[M]
power = power_set[power_index] # W
return power, power_index
def Step(p_t, H2_t):
rate_last, sinr_norm_, p_last, reward_ = sess.run([rate_matrix, sinr_norm_inv, P_matrix, reward],
feed_dict={P: p_t, H2: H2_t, W: W_, sigma2: sigma2_})
s_next = Generate_state(rate_last, p_last, sinr_norm_)
return s_next, reward_
def Experience_replay(sess):
batch_s, batch_a, batch_r = Sample()
sess.run(train_main, feed_dict = {s : batch_s, a : batch_a, y : batch_r})
def Save(weight_file):
dict_name={}
for var in list_main:
dict_name[var.name]=var.eval()
io.savemat(weight_file, dict_name)
def Network_ini(theta):
update=[]
for var in list_main:
update.append(tf.assign(tf.get_default_graph().get_tensor_by_name(var.name),tf.constant(np.reshape(theta[var.name],var.shape))))
return update
def Initial_para():
H2_set = Generate_H_set()
s_next, _ = Step(np.zeros([M], dtype = dtype), H2_set[:,:,0])
return H2_set, s_next
def Plot_environment():
if flag_fig:
import matplotlib.pyplot as plt
plt.close('all')
p_tx = np.zeros((n_y, n_x))
p_ty = np.zeros((n_y, n_x))
p_rx = np.zeros((n_y, n_x, maxM))
p_ry = np.zeros((n_y, n_x, maxM))
dis_r = np.random.uniform(min_dis, max_dis, size = (n_y, n_x, maxM))
phi_r = np.random.uniform(-np.pi, np.pi, size = (n_y, n_x, maxM))
for i in range(n_y):
for j in range(n_x):
p_tx[i,j] = 2*max_dis*j + (i%2)*max_dis
p_ty[i,j] = np.sqrt(3.)*max_dis*i
for k in range(maxM):
p_rx[i,j,k] = p_tx[i,j] + dis_r[i,j,k]*np.cos(phi_r[i,j,k])
p_ry[i,j,k] = p_ty[i,j] + dis_r[i,j,k]*np.sin(phi_r[i,j,k])
plt.close('all')
plt.figure(1)
for j in range(n_x):
for i in range(n_y):
for k in range(6):
x_t = [p_tx[i,j]+2/np.sqrt(3.)*max_dis*np.sin(np.pi/3*k), p_tx[i,j]+2/np.sqrt(3.)*max_dis*np.sin(np.pi/3*(k+1))]
y_t = [p_ty[i,j]+2/np.sqrt(3.)*max_dis*np.cos(np.pi/3*k), p_ty[i,j]+2/np.sqrt(3.)*max_dis*np.cos(np.pi/3*(k+1))]
plt.plot(x_t, y_t, color="black")
for i in range(n_y):
for j in range(n_x):
for l in range(maxM):
if user_list[i*n_x+j,l]:
rx = plt.scatter(p_rx[i,j,l], p_ry[i,j,l], marker='o', label='2', s=25, color='orange')
plt.text(p_rx[i,j,l]+0.03, p_ry[i,j,l]+0.03, '%d' %((i*n_x+j)*maxM+l), ha ='center', va = 'bottom', fontsize=12)
tx = plt.scatter(p_tx, p_ty, marker='x', label='1', s=45)
for i in range(n_y):
for j in range(n_x):
plt.text(p_tx[i,j]+0.1, p_ty[i,j]+0.1, '%d' %(i*n_x+j), ha ='center', va = 'bottom', fontsize=15, color = 'r')
plt.legend([tx, rx], ["BS", "User"])
plt.xlabel('X axis (km)')
plt.ylabel('Y axis (km)')
plt.axis('equal')
plt.show()
#
# if train_hist:
# window = 101
# train_hist = Smooth(np.array(train_hist), window)
# plt.figure(2)
# plt.plot(range(EPISODE), train_hist)
# plt.show()
def Train_episode(sess, episode):
reward_dqn_list = list()
H2_set, s_t = Initial_para()
for step_index in range(int(Ns)):
p_t, a_t = Select_action(sess, s_t, episode)
s_next, r_ = Step(p_t, H2_set[:,:,step_index])
Save_store(s_t, a_t, r_, s_next)
if episode > OBSERVE:
if step_index % train_interval == 0:
Experience_replay(sess)
s_t = s_next
reward_dqn_list.append(r_)
if episode > OBSERVE:
sess.run(add_global)
dqn_mean = sum(reward_dqn_list)/(Ns*M) # bps/Hz per link
return dqn_mean
def Test_episode(sess, episode):
'''
1.DQN
2.FP
3.WMMSE
4.Maximum Power
5.Random Power
'''
reward_dqn_list = list()
reward_fp_list = list()
reward_wmmse_list = list()
reward_mp_list = list()
reward_rp_list = list()
H2_set, s_t = Initial_para()
for step_index in range(int(Ns)):
q_hat_ = sess.run(q_main, feed_dict={s: s_t}) #[N, power_num]
p_t = power_set[np.argmax(q_hat_, axis = 1)] # W
s_next, r_ = Step(p_t, H2_set[:,:,step_index])
s_t = s_next
reward_dqn_list.append(r_)
pa_alg_set.Load_data(H2_set[:,:,step_index], p_array)
fp_alg, wmmse_alg, mp_alg, rp_alg = pa_alg_set.Calculate()
r_fp = sess.run(reward, feed_dict={P: fp_alg, H2: H2_set[:,:,step_index], W: W_, sigma2: sigma2_})
r_wmmse = sess.run(reward, feed_dict={P: wmmse_alg, H2: H2_set[:,:,step_index], W: W_, sigma2: sigma2_})
r_mp = sess.run(reward, feed_dict={P: mp_alg, H2: H2_set[:,:,step_index], W: W_, sigma2: sigma2_})
r_rp = sess.run(reward, feed_dict={P: rp_alg, H2: H2_set[:,:,step_index], W: W_, sigma2: sigma2_})
reward_fp_list.append(r_fp)
reward_wmmse_list.append(r_wmmse)
reward_mp_list.append(r_mp)
reward_rp_list.append(r_rp)
dqn_mean = sum(reward_dqn_list)/(Ns*M)
fp_mean = sum(reward_fp_list)/(Ns*M)
wmmse_mean = sum(reward_wmmse_list)/(Ns*M)
mp_mean = sum(reward_mp_list)/(Ns*M)
rp_mean = sum(reward_rp_list)/(Ns*M)
return dqn_mean, fp_mean, wmmse_mean, mp_mean, rp_mean
def Train(sess):
st = time.time()
dqn_hist = list()
for k in range(1, EPISODE+1):
dqn_hist.append(Train_episode(sess, k))
if k % 100 == 0:
print("Episode(train):%d DQN: %.3f Time cost: %.2fs"
%(k, np.mean(dqn_hist[-100:]), time.time()-st))
st = time.time()
Save(weight_file)
return dqn_hist
def Test(sess):
sess.run(load)
dqn_hist = list()
fp_hist = list()
wmmse_hist = list()
mp_hist = list()
rp_hist = list()
for k in range(1, TEST_EPISODE+1):
dqn_mean, fp_mean, wmmse_mean, mp_mean, rp_mean = Test_episode(sess, k)
dqn_hist.append(dqn_mean)
fp_hist.append(fp_mean)
wmmse_hist.append(wmmse_mean)
mp_hist.append(mp_mean)
rp_hist.append(rp_mean)
print("Test: DQN: %.3f FP: %.3f WMMSE: %.3f MP: %.3f RP: %.3f"
%(np.mean(dqn_hist), np.mean(fp_hist), np.mean(wmmse_hist), np.mean(mp_hist), np.mean(rp_hist)))
print("%.2f, %.2f, %.2f, %.2f, %.2f"
%(np.mean(dqn_hist), np.mean(fp_hist), np.mean(wmmse_hist), np.mean(mp_hist), np.mean(rp_hist)))
return dqn_hist, fp_hist, wmmse_hist, mp_hist, rp_hist
def Test_one(sess):
sess.run(load)
reward_dqn_list = list()
reward_fp_list = list()
reward_wmmse_list = list()
reward_mp_list = list()
reward_rp_list = list()
H2_set, s_t = Initial_para()
for step_index in range(int(Ns)):
q_hat_ = sess.run(q_main, feed_dict={s: s_t}) #[N, power_num]
p_t = power_set[np.argmax(q_hat_, axis = 1)] # W
s_next, r_ = Step(p_t, H2_set[:,:,step_index])
s_t = s_next
reward_dqn_list.append(r_/M)
pa_alg_set.Load_data(H2_set[:,:,step_index], p_array)
fp_alg, wmmse_alg, mp_alg, rp_alg = pa_alg_set.Calculate()
r_fp = sess.run(reward, feed_dict={P: fp_alg, H2: H2_set[:,:,step_index], W: W_, sigma2: sigma2_})
r_wmmse = sess.run(reward, feed_dict={P: wmmse_alg, H2: H2_set[:,:,step_index], W: W_, sigma2: sigma2_})
r_mp = sess.run(reward, feed_dict={P: mp_alg, H2: H2_set[:,:,step_index], W: W_, sigma2: sigma2_})
r_rp = sess.run(reward, feed_dict={P: rp_alg, H2: H2_set[:,:,step_index], W: W_, sigma2: sigma2_})
reward_fp_list.append(r_fp/M)
reward_wmmse_list.append(r_wmmse/M)
reward_mp_list.append(r_mp/M)
reward_rp_list.append(r_rp/M)
if flag_fig:
import matplotlib.pyplot as plt
plt.figure(3)
window = 11
y = list()
y.append(Smooth(np.array(reward_dqn_list), window))
y.append(Smooth(np.array(reward_fp_list), window))
y.append(Smooth(np.array(reward_wmmse_list), window))
y.append(Smooth(np.array(reward_mp_list), window))
y.append(Smooth(np.array(reward_rp_list), window))
label=['DQN','FP','WMMSE','Random power','Maximal power']
color = ['royalblue', 'orangered', 'lawngreen', 'gold', 'olive']
linestyle = ['-', '--', '-.', ':', '--']
p = list()
for k in range(5):
p_temp, = plt.plot(range(int(Ns)), y[k], color = color[k], linestyle = linestyle[k], label = label[k])
p.append(p_temp)
plt.legend(loc = 7)
plt.xlabel('Time slot')
plt.ylabel('Average rate (bps)')
plt.grid()
plt.show()
print("Test: DQN: %.2f FP: %.2f WMMSE: %.2f MP: %.2f RP: %.2f"
%(np.mean(reward_dqn_list), np.mean(reward_fp_list), np.mean(reward_wmmse_list), np.mean(reward_mp_list), np.mean(reward_rp_list)))
def Bar_plot():
import matplotlib.pyplot as plt
plt.figure(4)
data = [[3.19, 2.97, 2.91, 2.00, 1.99],
[2.22, 1.97, 2.01, 1.02, 1.02],
[1.53, 1.33, 1.38, 0.51, 0.51],
[1.22, 1.05, 1.10, 0.35, 0.35]]
data = np.array(data).T
name_list = ['K=1','K=2','K=4','K=6']
num = len(data[0])
set_size = len(data)
total_width = 3
width = total_width / num
label=['DQN','FP','WMMSE','Random power','Maximal power']
color = ['royalblue', 'orangered', 'lawngreen', 'gold', 'olive']
x = np.linspace(0, 16, num = num, dtype = dtype)
plt.xticks(x+total_width/2, name_list)
bar = list()
for k in range(set_size):
bar.append(plt.bar(x, data[k], width = width, label = label[k],fc = color[k]))
x = x + width
for k in range(set_size):
for n in range(num):
height = bar[k][n].get_height()
if k == 4:
plt.text(bar[k][n].get_x() + bar[k][n].get_width()/2, height+0.1, data[k][n], ha='center', va='bottom')
else:
plt.text(bar[k][n].get_x() + bar[k][n].get_width()/2, height, data[k][n], ha='center', va='bottom')
plt.legend(loc = 1)
plt.xlabel('User number per cell')
plt.ylabel('Average rate (bps)')
plt.show()
def Smooth(a, window):
out0 = np.convolve(a, np.ones(window, dtype=np.int32),'valid')/window
r = np.arange(1, window-1, 2)
start = np.cumsum(a[:window-1])[::2]/r
stop = (np.cumsum(a[:-window:-1])[::2]/r)[::-1]
return np.concatenate((start, out0, stop))
if __name__ == "__main__":
with tf.Graph().as_default():
H2 = tf.placeholder(shape = [None, K], dtype = dtype)
P = tf.placeholder(shape = [None], dtype = dtype)
W = tf.placeholder(shape = [None], dtype = dtype)
sigma2 = tf.placeholder(dtype = dtype)
p_array, p_list, user_list = Generate_environment()
rate_matrix, sinr_norm_inv, P_matrix, reward = Calculate_rate()
s = tf.placeholder(shape = [None, state_num], dtype = dtype)
a = tf.placeholder(shape = [None, power_num], dtype = dtype)
y = tf.placeholder(shape = [None], dtype = dtype)
q_main, a_main, r, list_main = Network(s, a, 'main')
cost = Loss(y, r)
train_main, add_global = Optimizer(cost, list_main)
with tf.Session() as sess:
tf.global_variables_initializer().run()
train_hist = Train(sess)
io.savemat(hist_file, {'train_hist':train_hist})
Plot_environment()
Bar_plot()
pa_alg_set = PA_alg(M, K, maxP)
load = Network_ini(io.loadmat(weight_file))
with tf.Session() as sess:
tf.global_variables_initializer().run()
Test_one(sess)
# dqn_hist, fp_hist, wmmse_hist, mp_hist, rp_hist = Test(sess)
dqn_gamma_test.py
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 7 14:36:08 2018
@author: mengxiaomao / Email: mengxiaomaomao@outlook.com
"""
import os
import time
import scipy.spatial
import random
import numpy as np
from scipy import io
import tensorflow as tf
from collections import deque
reuse = tf.AUTO_REUSE
dtype = np.float32
flag_fig = True
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
fd = 10
Ts = 20e-3
L = 2
C = 16
meanM = 2 # lamda: average user number in one BS
minM = 4 # maximum user number in one BS
maxM = 4 # maximum user number in one BS
min_dis = 0.01 #km
max_dis = 1. #km
min_p = 5. #dBm
max_p = 38. #dBm
p_n = -114. #dBm
power_num = 10 #action_num
OBSERVE = 100
EPISODE = 1000
memory_size = 5000
INITIAL_EPSILON = 0.2
FINAL_EPSILON = 0.0001
learning_rate = 0.001
train_interval = 10
batch_size = 256
update_rate = 0.01
c = 3*L*(L+1) + 1 # adjascent BS
K = maxM * c # maximum adjascent users, including itself
state_num = 3*C + 2 # 3*K - 1 3*C + 2
sigma2_ = 1e-3*pow(10., p_n/10.)
maxP = 1e-3*pow(10., max_p/10.)
power_set = np.hstack([np.zeros((1), dtype=dtype), 1e-3*pow(10., np.linspace(min_p, max_p, power_num-1)/10.)])
gamma = 0.3
weight_file = os.getcwd() + '\saved_networks\dqn_01.mat'
hist_file = 'C:/Users/86151/PycharmProjects/hist_01'
replay_memory = deque(maxlen = memory_size)
def Generate_H_set():
'''
Jakes model
'''
pho = np.float32(scipy.special.k0(2*np.pi*fd*Ts))
H_set = np.zeros([M,K,int(Ns)], dtype=dtype)
H_set[:,:,0] = np.kron(np.sqrt(0.5*(np.random.randn(M, c)**2+np.random.randn(M, c)**2)), np.ones((1,maxM), dtype=np.int32))
for i in range(1,int(Ns)):
H_set[:,:,i] = H_set[:,:,i-1]*pho + np.sqrt((1.-pho**2)*0.5*(np.random.randn(M, K)**2+np.random.randn(M, K)**2))
path_loss = Generate_path_loss()
H2_set = np.square(H_set) * np.tile(np.expand_dims(path_loss, axis=2), [1,1,int(Ns)])
return H2_set
def Generate_environment():
path_matrix = M*np.ones((n_y + 2*L, n_x + 2*L, maxM), dtype = np.int32)
for i in range(L, n_y+L):
for j in range(L, n_x+L):
for l in range(maxM):
path_matrix[i,j,l] = ((i-L)*n_x + (j-L))*maxM + l
p_array = np.zeros((M, K), dtype = np.int32)
for n in range(N):
i = n//n_x
j = n%n_x
Jx = np.zeros((0), dtype = np.int32)
Jy = np.zeros((0), dtype = np.int32)
for u in range(i-L, i+L+1):
v = 2*L+1-np.abs(u-i)
jx = j - (v-i%2)//2 + np.linspace(0, v-1, num = v, dtype = np.int32) + L
jy = np.ones((v), dtype = np.int32)*u + L
Jx = np.hstack((Jx, jx))
Jy = np.hstack((Jy, jy))
for l in range(maxM):
for k in range(c):
for u in range(maxM):
p_array[n*maxM+l,k*maxM+u] = path_matrix[Jy[k],Jx[k],u]
p_main = p_array[:,(c-1)//2*maxM:(c+1)//2*maxM]
for n in range(N):
for l in range(maxM):
temp = p_main[n*maxM+l,l]
p_main[n*maxM+l,l] = p_main[n*maxM+l,0]
p_main[n*maxM+l,0] = temp
p_inter = np.hstack([p_array[:,:(c-1)//2*maxM], p_array[:,(c+1)//2*maxM:]])
p_array = np.hstack([p_main, p_inter])
user = np.maximum(np.minimum(np.random.poisson(meanM, (N)), maxM), minM)
user_list = np.zeros((N, maxM), dtype = np.int32)
for i in range(N):
user_list[i,:user[i]] = 1
for k in range(N):
for i in range(maxM):
if user_list[k,i] == 0.:
p_array = np.where(p_array == k*maxM+i, M, p_array)
p_list = list()
for i in range(M):
p_list_temp = list()
for j in range(K):
p_list_temp.append([p_array[i,j]])
p_list.append(p_list_temp)
return p_array, p_list, user_list
def Generate_path_loss():
slope = 0. #0.3
p_tx = np.zeros((n_y, n_x))
p_ty = np.zeros((n_y, n_x))
p_rx = np.zeros((n_y, n_x, maxM))
p_ry = np.zeros((n_y, n_x, maxM))
dis_rx = np.random.uniform(min_dis, max_dis, size = (n_y, n_x, maxM))
phi_rx = np.random.uniform(-np.pi, np.pi, size = (n_y, n_x, maxM))
for i in range(n_y):
for j in range(n_x):
p_tx[i,j] = 2*max_dis*j + (i%2)*max_dis
p_ty[i,j] = np.sqrt(3.)*max_dis*i
for k in range(maxM):
p_rx[i,j,k] = p_tx[i,j] + dis_rx[i,j,k]*np.cos(phi_rx[i,j,k])
p_ry[i,j,k] = p_ty[i,j] + dis_rx[i,j,k]*np.sin(phi_rx[i,j,k])
dis = 1e10 * np.ones((M, K), dtype = dtype)
lognormal = np.zeros((M, K), dtype = dtype)
for k in range(N):
for l in range(maxM):
for i in range(c):
for j in range(maxM):
if p_array[k*maxM+l,i*maxM+j] < M:
bs = p_array[k*maxM+l,i*maxM+j]//maxM
dx2 = np.square((p_rx[k//n_x][k%n_x][l]-p_tx[bs//n_x][bs%n_x]))
dy2 = np.square((p_ry[k//n_x][k%n_x][l]-p_ty[bs//n_x][bs%n_x]))
distance = np.sqrt(dx2 + dy2)
dis[k*maxM+l,i*maxM+j] = distance
std = 8. + slope * (distance - min_dis)
lognormal[k*maxM+l,i*maxM+j] = np.random.lognormal(sigma = std)
path_loss = lognormal*pow(10., -(120.9 + 37.6*np.log10(dis))/10.)
return path_loss
def Calculate_rate():
maxC = 1000.
P_extend = tf.concat([P, tf.zeros((1), dtype=dtype)], axis=0)
P_matrix = tf.gather_nd(P_extend, p_list)
path_main = tf.multiply(H2[:,0], P_matrix[:,0])
path_inter = tf.reduce_sum(tf.multiply(H2[:,1:], P_matrix[:,1:]), axis=1)
sinr = path_main / (path_inter + sigma2)
sinr = tf.minimum(sinr, maxC) #capped sinr
rate = W * tf.log(1. + sinr)/np.log(2)
rate_extend = tf.concat([rate, tf.zeros((1), dtype=dtype)], axis=0)
rate_matrix = tf.gather_nd(rate_extend, p_list)
sinr_norm_inv = H2[:,1:] / tf.tile(H2[:,0:1], [1,K-1])
sinr_norm_inv = tf.log(1. + sinr_norm_inv)/np.log(2)# log representation
reward = tf.reduce_sum(rate)
return rate_matrix, sinr_norm_inv, P_matrix, reward
def Generate_state(rate_last, p_last, sinr_norm_inv):
'''
Generate state matrix
ranking
state including:
1.rate[t-1] [M,K] rate_last
2.power[t-1] [M,K] p_last
3.sinr_norm_inv[t] [M,K-1] sinr_norm_inv
'''
indices1 = np.tile(np.expand_dims(np.linspace(0, M-1, num=M, dtype=np.int32), axis=1),[1,C])
indices2 = np.argsort(sinr_norm_inv, axis = 1)[:,-C:]
rate_last = np.hstack([rate_last[:,0:1], rate_last[indices1, indices2+1]])
p_last = np.hstack([p_last[:,0:1], p_last[indices1, indices2+1]])
sinr_norm_inv = sinr_norm_inv[indices1, indices2]
s_t = np.hstack([rate_last, p_last, sinr_norm_inv])
return s_t
def Variable(shape):
w = tf.get_variable('w', shape=shape, initializer = tf.truncated_normal_initializer(stddev=0.1))
b = tf.get_variable('b', shape=[shape[-1]], initializer = tf.constant_initializer(0.01))
return w, b
def Find_params(para_name):
sets=[]
for var in tf.trainable_variables():
if not var.name.find(para_name):
sets.append(var)
return sets
def Network(s, a, name):
with tf.variable_scope(name + '.0', reuse = reuse):
w,b = Variable([state_num, 128])
l = tf.nn.relu(tf.matmul(s, w)+b)
with tf.variable_scope(name + '.1', reuse = reuse):
w,b = Variable([128, 64])
l = tf.nn.relu(tf.matmul(l, w)+b)
with tf.variable_scope(name + '.2', reuse = reuse):
w,b = Variable([64, power_num])
q_hat = tf.matmul(l, w) + b
r = tf.reduce_sum(tf.multiply(q_hat, a), reduction_indices = 1)
a_hat = tf.argmax(q_hat, 1)
list_var = Find_params(name)
return q_hat, a_hat, r, list_var
def Loss(y, r):
cost = tf.reduce_mean(tf.square((y - r)))
return cost
def Optimizer(cost, var_list):
global_step = tf.Variable(0, trainable=False)
lr = tf.train.exponential_decay(learning_rate, global_step = global_step,
decay_steps = EPISODE, decay_rate = 0.1)
add_global = global_step.assign_add(1)
with tf.variable_scope('opt', reuse = reuse):
train_op = tf.train.AdamOptimizer(learning_rate = lr).minimize(cost, var_list = var_list)
return train_op, add_global
def Save_store(s_t, a_t, r_t, s_next):
r_t = np.tile(r_t, (M))
p_t = np.zeros((M, power_num), dtype = dtype)
p_t[range(M), a_t] = 1.
for i in range(M):
replay_memory.append((s_t[i], p_t[i], r_t[i], s_next[i]))
def Sample():
minibatch = random.sample(replay_memory, batch_size)
batch_s = [d[0] for d in minibatch]
batch_a = [d[1] for d in minibatch]
batch_r = [d[2] for d in minibatch]
batch_s_next = [d[3] for d in minibatch]
return batch_s, batch_a, batch_r, batch_s_next
def Select_action(sess, s_t, episode):
if episode > OBSERVE:
epsilon = INITIAL_EPSILON - (episode - OBSERVE) * (INITIAL_EPSILON - FINAL_EPSILON) / (EPISODE - OBSERVE)
elif episode <= OBSERVE:
epsilon = INITIAL_EPSILON
else:
epsilon = 0.
q_hat_ = sess.run(q_main, feed_dict={s: s_t})
best_action = np.argmax(q_hat_, axis = 1)
random_index = np.array(np.random.uniform(size = (M)) < epsilon, dtype = np.int32)
random_action = np.random.randint(0, high = power_num, size = (M))
action_set = np.vstack([best_action, random_action])
power_index = action_set[random_index, range(M)]
power = power_set[power_index] # W
return power, power_index
def Step(p_t, H2_t):
rate_last, sinr_norm_, p_last, reward_ = sess.run([rate_matrix, sinr_norm_inv, P_matrix, reward],
feed_dict={P: p_t, H2: H2_t, W: W_, sigma2: sigma2_})
s_next = Generate_state(rate_last, p_last, sinr_norm_)
return s_next, reward_
def Experience_replay(sess):
batch_s, batch_a, batch_r, batch_s_next = Sample()
a_main_, q_main_ = sess.run([a_main, q_main], feed_dict = {s : batch_s_next})
q_double = q_main_[range(batch_size), a_main_]
y_ = batch_r + gamma * q_double
sess.run(train_main, feed_dict = {s : batch_s, a : batch_a, y : y_})
sess.run(update)
def Network_update(list_main, list_targ):
update=[]
for i in range(len(list_targ)):
value = list_main[i].value() * update_rate + (1. - update_rate) * list_targ[i].value()
update.append(tf.assign(tf.get_default_graph().get_tensor_by_name(list_targ[i].name), value))
return update
def Initial_para():
H2_set = Generate_H_set()
s_next, _ = Step(np.zeros([M], dtype = dtype), H2_set[:,:,0])
return H2_set, s_next
def Train_episode(sess, episode):
reward_dqn_list = list()
H2_set, s_t = Initial_para()
for step_index in range(int(Ns)):
p_t, a_t = Select_action(sess, s_t, episode)
s_next, r_ = Step(p_t, H2_set[:,:,step_index])
Save_store(s_t, a_t, r_, s_next)
if episode > OBSERVE:
if step_index % train_interval == 0:
Experience_replay(sess)
s_t = s_next
reward_dqn_list.append(r_)
if episode > OBSERVE:
sess.run(add_global)
dqn_mean = sum(reward_dqn_list)/(Ns*M) # bps/Hz per link
return dqn_mean
def Test_episode(sess, episode):
reward_dqn_list = list()
H2_set, s_t = Initial_para()
for step_index in range(int(Ns)):
q_hat_ = sess.run(q_main, feed_dict={s: s_t})
p_t = power_set[np.argmax(q_hat_, axis = 1)] # W
s_next, r_ = Step(p_t, H2_set[:,:,step_index])
s_t = s_next
reward_dqn_list.append(r_)
dqn_mean = sum(reward_dqn_list)/(Ns*M)
return dqn_mean
def Train(sess):
st = time.time()
dqn_hist = list()
for k in range(1, EPISODE+1):
dqn_hist.append(Train_episode(sess, k))
if k % 100 == 0:
print("Episode(train):%d DQN: %.3f Time cost: %.2fs"
%(k, np.mean(dqn_hist[-100:]), time.time()-st))
st = time.time()
Save(weight_file)
return dqn_hist
def Plot_gamma():
if flag_fig:
import matplotlib.pyplot as plt
plt.figure(6)
window = 201
hist = [os.getcwd() + '\saved_networks\hist_00',
os.getcwd() + '\saved_networks\hist_01',
os.getcwd() + '\saved_networks\hist_03',
os.getcwd() + '\saved_networks\hist_07',
os.getcwd() + '\saved_networks\hist_09']
y = list()
for k in range(5):
hist_data = io.loadmat(hist[k])['train_hist'][0]
y.append(Smooth(np.array(hist_data), window))
label=[r'$\gamma = 0.0$',r'$\gamma = 0.1$',r'$\gamma = 0.3$',r'$\gamma = 0.7$',r'$\gamma = 0.9$']
color = ['royalblue', 'orangered', 'lawngreen', 'gold', 'olive']
linestyle = ['-', '--', '-.', ':', '--']
p = list()
for k in range(5):
p_temp, = plt.plot(range(EPISODE), y[k], color = color[k], linestyle = linestyle[k], label = label[k])
p.append(p_temp)
plt.legend(loc = 4)
plt.xlabel('Training Epoch')
plt.ylabel('Average rate (bps)')
plt.grid()
plt.show()
def Smooth(a, window):
a = np.concatenate((np.ones(window//2)*a[0], a, np.ones(window//2)*a[-1]))
out0 = np.convolve(a, np.ones(window, dtype=np.int32),'valid')/window
return out0
def Test(sess):
sess.run(load)
dqn_hist = list()
for k in range(1, TEST_EPISODE+1):
dqn_list = Test_episode(sess, k)
dqn_hist.append(np.mean(dqn_list))
return dqn_hist
def Save(weight_file):
dict_name={}
for var in list_main:
dict_name[var.name]=var.eval()
io.savemat(weight_file, dict_name)
def Network_ini(theta):
update=[]
for var in list_main:
update.append(tf.assign(tf.get_default_graph().get_tensor_by_name(var.name),tf.constant(np.reshape(theta[var.name],var.shape))))
return update
def Bar_plot_gamma(dqn_hist):
import matplotlib.pyplot as plt
plt.figure(4)
data = np.array(dqn_hist)
data = np.array(data).T
name_list = ['N=25','N=49','N=100']
num = len(data[0])
set_size = len(data)
total_width = 4.0
width = total_width / num
label=[r'$\gamma = 0.0$',r'$\gamma = 0.1$',r'$\gamma = 0.3$',r'$\gamma = 0.7$',r'$\gamma = 0.9$']
color = ['royalblue', 'orangered', 'lawngreen', 'gold', 'olive']
x = np.linspace(0, 16, num = num, dtype = dtype)
plt.xticks(x+total_width/2+0.5, name_list)
bar = list()
for k in range(set_size):
bar.append(plt.bar(x, data[k], width = width, label = label[k],fc = color[k]))
x = x + width
for k in range(set_size):
for n in range(num):
height = bar[k][n].get_height()
plt.text(bar[k][n].get_x() + bar[k][n].get_width()/2, height+0.01, '%.2f' %data[k][n], ha='center', va='bottom')
plt.legend(loc = 5)
plt.xlabel('Number of cells')
plt.ylabel('Average rate (bps)')
plt.show()
if __name__ == "__main__":
n_x = 5
n_y = 5
N = n_x * n_y # BS number
M = N * maxM # maximum users
W_ = np.ones((M), dtype = dtype) #[M]
Ns = 5e1
with tf.Graph().as_default():
H2 = tf.placeholder(shape = [None, K], dtype = dtype)
P = tf.placeholder(shape = [None], dtype = dtype)
W = tf.placeholder(shape = [None], dtype = dtype)
sigma2 = tf.placeholder(dtype = dtype)
p_array, p_list, user_list = Generate_environment()
rate_matrix, sinr_norm_inv, P_matrix, reward = Calculate_rate()
s = tf.placeholder(shape = [None, state_num], dtype = dtype)
a = tf.placeholder(shape = [None, power_num], dtype = dtype)
y = tf.placeholder(shape = [None], dtype = dtype)
q_targ, a_targ, _, list_targ = Network(s, a, 'targ')
q_main, a_main, r, list_main = Network(s, a, 'main')
cost = Loss(y, r)
train_main, add_global = Optimizer(cost, list_main)
update = Network_update(list_main, list_targ)
with tf.Session() as sess:
tf.global_variables_initializer().run()
train_hist = Train(sess)
io.savemat(hist_file, {'train_hist':train_hist})
weight = [os.getcwd() + '\saved_networks\dqn_00.mat',
os.getcwd() + '\saved_networks\dqn_01.mat',
os.getcwd() + '\saved_networks\dqn_03.mat',
os.getcwd() + '\saved_networks\dqn_07.mat',
os.getcwd() + '\saved_networks\dqn_09.mat']
dqn_hist = list()
num = [5,7,10]
for n in num:
n_x = n
n_y = n
N = n_x * n_y # BS number
M = N * maxM # maximum users
W_ = np.ones((M), dtype = dtype) #[M]
Ns = 1e3
TEST_EPISODE = 100
Ns = 4e1
TEST_EPISODE = 500
with tf.Graph().as_default():
H2 = tf.placeholder(shape = [None, K], dtype = dtype)
P = tf.placeholder(shape = [None], dtype = dtype)
W = tf.placeholder(shape = [None], dtype = dtype)
sigma2 = tf.placeholder(dtype = dtype)
p_array, p_list, user_list = Generate_environment()
rate_matrix, sinr_norm_inv, P_matrix, reward = Calculate_rate()
s = tf.placeholder(shape = [None, state_num], dtype = dtype)
a = tf.placeholder(shape = [None, power_num], dtype = dtype)
q_main, a_main, r, list_main = Network(s, a, 'main')
dqn_hist_temp = list()
for k in range(5):
load = Network_ini(io.loadmat(weight[k]))
with tf.Session() as sess:
tf.global_variables_initializer().run()
aa = np.mean(Test(sess))
dqn_hist_temp.append(aa)
dqn_hist.append(dqn_hist_temp)
Plot_gamma()
Bar_plot_gamma(dqn_hist)
本人也是初次学习该方向,以上代码均已全部跑通,在进一步学习和分析当中,如果有兴趣可以加q : 1492768236 交流 。