前言
本科毕设的时候写了一些python代码,里面有基本的python变量定义方法,记录一下。
power_sit
import numpy as np
from scipy.spatial.distance import pdist,squareform
import matplotlib.pyplot as plt
class power_situation:
#初始化基站位置 用户位置 最大功率值 最大速度
#其实初始化的时候 就可以返回一个观测值了
def __init__(self):
self.expectBS = 9 # 基站数
self.UE_num = 9 # 用户数
self.N0 = 1e-9 # 噪声干扰
self.pmax = 1 # dbm
self.B = 1
self.all_distance = [] # 所有元素之间的距离
self.BS_UE_distance = [] # 基站与用户之间的距离
self.BS_BS_distance = [] # 基站之间的距离
self.BS_loaction = [] # 基站的坐标
self.UE_location = [] # 用户的坐标
self.UE_move_direction = [] # 用户移动的方向
self.UE_move_velocity = [] # 用户移动速度
self.MAX_velocity = 1 # 用户最大移动速度
self.UE_move_direction = np.zeros((self.UE_num,)) # 用户移动方向
self.UE_move_velocity = np.zeros((self.UE_num,)) # 用户移动速度
self.now_gnn_of_all = np.zeros((1, self.expectBS))
self.last_gnn_of_all = np.zeros((1, self.expectBS))
self.now_interference = np.zeros((1, self.expectBS))
self.last_interference = np.zeros((1, self.expectBS))
self.now_BS_power = self.pmax * np.random.rand(1,self.expectBS)
self.last_BS_power = np.zeros((1, self.expectBS))
self.now_SINR = np.zeros((1, self.expectBS))
self.last_SINR = np.zeros((1, self.expectBS))
self.now_C = np.zeros((1, self.expectBS))
self.last_C = np.zeros((1, self.expectBS))
def reset(self):
self.BS_location = self.get_bs_location()
self.UE_location = self.get_ue_location(self.expectBS)
self.BS_UE_location = np.vstack((self.BS_location, self.UE_location))
self.all_distance = squareform(pdist(self.BS_UE_location, 'euclidean'))
# 纵坐标表示基站号
self.BS_UE_distance = self.all_distance[self.expectBS:, :self.expectBS] # 基站和用户间的距离
self.BS_BS_distance = self.all_distance[:self.expectBS, :self.expectBS] # 基站和基站之间的距离
self.now_gnn_of_all = self.compute_gnn_of__all()
self.now_BS_power = self.now_BS_power
self.now_interference = self.compute_interference_of_one_UE()
self.now_SINR = self.compute_SINR()
self.now_C = self.compute_C()
# 初始化的时候 now直接赋值给last 这样不会出现null
self.last_gnn_of_all = self.now_gnn_of_all
self.last_BS_power = self.now_BS_power
self.last_interference = self.now_interference
self.last_SINR = self.now_SINR
self.last_C = self.now_C
observe_return = np.zeros((self.expectBS, 7))
for i in range(self.expectBS):
observe_return[i, 0] = self.last_gnn_of_all[0, i]
observe_return[i, 1] = self.last_BS_power[0, i]
observe_return[i, 2] = self.last_interference[0, i]
observe_return[i, 3] = self.last_SINR[0, i]
observe_return[i, 4] = self.last_C[0, i]
observe_return[i, 5] = self.now_gnn_of_all[0, i]
observe_return[i, 6] = self.now_interference[0, i]
return observe_return
def get_bs_location(self):
a = 0
#基站为止
locationx = np.array([100,150,200,100,150,200,100,150,200])
locationy = np.array([100,100,100,150,150,150,200,200,200])
BS_location = np.vstack((locationx, locationy))
return BS_location.T
def get_ue_location(self, expect_bs): # 用户数最开始是服从泊松分布的
'''得到UE的位置信息'''
# width = self.net_size[0]
# length = self.net_size[1]
# locationx = np.random.rand(UE_num) * width
# locationy = np.random.rand(UE_num) * length
#用户在基站覆盖范围之内移动
locationx = np.random.rand(expect_bs)
locationy = np.random.rand(expect_bs)
for i in range(expect_bs):
a = 0
# locationx[i]=self.BS_location[i,0] + 50* (np.random.rand(1)-0.5)
#不动的话 位置也可以暂时假定
# locationx[i] = self.BS_location[i, 0] + 10
locationx[i]=self.BS_location[i,0] + 100* (np.random.rand(1)-0.5)
for i in range(expect_bs):
a = 0
# locationy[i]=self.BS_location[i,1] + 50* (np.random.rand(1)-0.5)
# locationy[i]=self.BS_location[i,1] -10
locationy[i] = self.BS_location[i, 1] + 100* (np.random.rand(1)-0.5)
UE_location = np.vstack((locationx, locationy))
return UE_location.T
def get_observe(self):
a = 0
observe_return = np.zeros((self.expectBS, 7))
#状态的七个值 上一时刻的g 上一时刻的p 上一时刻干扰总和 上一时刻SINR 上一时刻信道容量 此时g 此时干扰总和
def compute_gnn_of__all(self):
gnn_of_all = np.zeros((1, self.expectBS))
for i in range(self.expectBS):
gnn_of_all[0,i] = self.BS_UE_distance[i,i] ** (-4)
return gnn_of_all
def compute_interference_of_one_UE(self):
interference_of_all = np.zeros((self.expectBS, self.expectBS))
noise_of_all = np.zeros((1, self.expectBS))
for i in range(self.expectBS):
for j in range(self.expectBS):
interference_of_all[i,j] = self.BS_UE_distance[i,j] ** (-4)
for i in range(self.expectBS):
for j in range(self.expectBS):
if j != i:
noise_of_all[0,i] = noise_of_all[0,i] + interference_of_all[i,j] * self.now_BS_power[0,j]
# print(str(interference_of_all[0,:]))
# print(str(self.BS_power[0,:]))
#5.308 * e-8
# for i in range(self.expectBS):
# print(str(interference_of_all[1,i]*self.BS_power[0,i]))
# a = 0
return noise_of_all
#这是一个求所有噪声干扰的和 也需要传回返回值 传回一个 1 * expect_bs的矩阵
def compute_SINR(self):
SINR_of_all = np.zeros((1, self.expectBS))
gnn_all = self.now_gnn_of_all
interference = self.now_interference
gussi = self.N0
temp = 0
for i in range(self.expectBS):
SINR_of_all[0,i] = self.now_gnn_of_all[0,i] * self.now_BS_power[0,i] /(self.N0 + self.now_interference[0,i])
#8.59* 10 -6 *0.757 /(9*10-8 + 1 * 1* 10-9)
temp = 0
return SINR_of_all
# 这是一个所有基站信道容量和的函数 需要传回一个1 * expect_bs的矩阵
def compute_C(self):
C_of_all = np.zeros((1, self.expectBS))
for i in range(self.expectBS):
C_of_all[0,i] = self.B * np.log(1 + self.now_SINR[0,i])
a = 0
return C_of_all
def step(self,action): #每一个step也可以考虑略微的 更新环境
#需要返回 下一个观测值 奖励值 奖励值可以相同
o_n_next_return = np.zeros((self.expectBS, 7))
r_n = np.zeros((self.expectBS))
#功率值变化 就出现了当前时刻的g 和当前时刻的干扰总值 赋值到矩阵中 迭代
for i in range(self.expectBS):
self.now_BS_power[0,i] = action[i,0]
self.now_gnn_of_all = self.compute_gnn_of__all()
self.now_interference = self.compute_interference_of_one_UE()
for i in range(self.expectBS):
o_n_next_return[i, 0] = self.last_gnn_of_all[0, i]
o_n_next_return[i, 1] = self.last_BS_power[0, i]
o_n_next_return[i, 2] = self.last_interference[0, i]
o_n_next_return[i, 3] = self.last_SINR[0, i]
o_n_next_return[i, 4] = self.last_C[0, i]
o_n_next_return[i, 5] = self.now_gnn_of_all[0, i]
o_n_next_return[i, 6] = self.now_interference[0, i]
self.now_SINR = self.compute_SINR()
self.now_C = self.compute_C()
# 初始化的时候 now直接赋值给last 这样不会出现null
self.last_gnn_of_all = self.now_gnn_of_all
self.last_BS_power = self.now_BS_power
self.last_interference = self.now_interference
self.last_SINR = self.now_SINR
self.last_C = self.now_C
sum_of_c = np.sum(self.now_C)
for i in range(self.expectBS):
r_n[i] = sum_of_c
#当前值赋给last 返回 o_n_next
a = 0
return o_n_next_return , r_n
def plot_history_of_cnk(self,action,reward,max_allocate_reward):
plt.subplot(221)
plt.plot(action[:,0],'r')
plt.plot(action[:,1],'g')
plt.plot(action[:,2],'b')
plt.plot(action[:,3],'c')
plt.plot(action[:,4],'m')
plt.plot(action[:,5],'y')
plt.plot(action[:,6],'k')
plt.plot(action[:,7],'w')
plt.plot(action[:,8],'r')
# plt.xticks(iteration)
plt.title('action of bs')
plt.subplot(222)
plt.plot(reward[:,0],'r')
plt.plot(max_allocate_reward[:,0],'b')
# plt.xticks([])
plt.title('real_reward(r) and max_reward')
plt.show()
def compute_max_allocation_C(self):
interference_of_all = np.zeros((self.expectBS, self.expectBS))
SINR_of_all = np.zeros((1, self.expectBS))
C_of_all = np.zeros((1, self.expectBS))
noise_of_all = np.zeros((1, self.expectBS))
for i in range(self.expectBS):
for j in range(self.expectBS):
interference_of_all[i,j] = self.BS_UE_distance[i,j] ** (-4)
for i in range(self.expectBS):
for j in range(self.expectBS):
if j != i:
noise_of_all[0,i] = noise_of_all[0,i] + interference_of_all[i,j] * self.pmax
for i in range(self.expectBS):
SINR_of_all[0, i] = self.now_gnn_of_all[0, i] * self.pmax / (
self.N0 + noise_of_all[0, i])
for i in range(self.expectBS):
C_of_all[0,i] = self.B * np.log(1 + SINR_of_all[0,i])
return np.sum(C_of_all)
设置中文字符
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False # 解决负号'-'显示为方块的问题
exec语句
for i in range(UE_number): exec(f'I_buffer_a_{i}.append(I_a_{i})')
Json文件创建
json.dumps(train1,indent=4) #train1为python对象 inent为缩进.
#模型保存
saver的创建尽可能放在下面
self.saver = tf.compat.v1.train.Saver()
self.ctrain_op = tf.train.AdamOptimizer(C_LR).minimize(self.closs)
with tf.variable_scope('atrain'):
#优化actor网络结构
self.atrain_op = tf.train.AdamOptimizer(A_LR).minimize(self.aloss)
def save_(self, name):
directory = os.path.join('./参数保存', name)
if not os.path.exists(directory):
os.makedirs(directory)
self.saver.save(self.sess, os.path.join(directory, name + '.ckpt'))
# self.saver.save(self.sess, './参数保存/'+name+'.ckpt')
def restore_(self, name):
# directory = os.path.join('./参数保存', name)
# model_path = os.path.join(directory, name + '.ckpt')
# self.saver.restore(self.sess, model_path)
# directory = os.path.join('./参数保存', name)
# # 获取最新的检查点路径
# latest_checkpoint = tf.train.latest_checkpoint(directory)
# if latest_checkpoint:
# # 恢复模型
# self.saver.restore(self.sess, latest_checkpoint)
# print(f"Model restored from {latest_checkpoint}")
# else:
# print("No checkpoint found.")
directory = os.path.join('./参数保存', name)
checkpoint_path = os.path.join(directory, name + '.ckpt')
if os.path.exists(checkpoint_path + '.index'):
self.saver.restore(self.sess, checkpoint_path)
print(f"Model restored from {checkpoint_path}")
else:
print("No checkpoint file found.")
强化学习训练技巧
1.对于actor critic网络,适当时候固定critic网络 有助于网络的训练
2.actor网络设计要简单 ,critic网络可以趋于复杂,注意初始层的输入范围。
其他经验
1.不要随便改动代码的游戏版本 (V0->V1),很有可能使得代码错误。此时应当降级gym。
2.tensorflow模型从tf.Module继承 可以使用很多方法
3.保存和加载模型 可以有很大的用处 latest_checkpoint很关键
4.如果不需要过程量,训练测试要完全分开 ,对训练完成的模型进行测试。
论文公式经验
1.上下标 – 双栏合一