强化学习系列文章(二十五):利用遗传规划算法解决CartPole问题
主程序
import gym
import numpy as np
import cgp
from cgp import *
import matplotlib.pyplot as plt
n_eps = 300
Mu = 10
Lambda = 40
env = gym.make('CartPole-v1')
env = env.unwrapped
r_list = []
pop = create_population(Mu+Lambda)
for e in range(n_eps):
for ind in pop:
obs = env.reset()
done = False
reward = 0
while not done:
x, xx, w, ww = obs
action = 0 if ind.eval(x, xx, w, ww) > 0 else 1
obs,r,done,_ = env.step(action)
reward += r
if reward >= 1500: break
ind.fitness = reward
pop = evolve(pop, 0.015, Mu, Lambda)
print(e,'\t',pop[0].fitness)
r_list.append(pop[0].fitness)
plt.plot(r_list)
plt.show()
遗传规划cgp.py
"""
Cartesian genetic programming
"""
import operator as op
import random
import copy
import math
from settings import VERBOSE, N_COLS, LEVEL_BACK
class Function:
"""
A general function
arity: 函数的输入参数的数量
"""
def __init__(self, f, arity, name=None):
self.f = f
self.arity = arity
self.name = f.__name__ if name is None else name
def __call__(self, *args, **kwargs):
return self.f(*args, **kwargs)
class Node:
"""
A node in CGP graph
"""
def __init__(self, max_arity):
"""
Initialize this node randomly
"""
self.i_func = None
self.i_inputs = [None] * max_arity
self.weights = [None] * max_arity
self.i_output = None
self.output = None
self.active = False
class Individual:
"""
An individual (chromosome, genotype, etc.) in evolution
"""
function_set = None
weight_range = [-1, 1]
max_arity = 3
n_inputs = 3
n_outputs = 1
n_cols = N_COLS # number of cols (nodes) in a single-row CGP
level_back = LEVEL_BACK # 后面的节点可以最远连接的前面节点的相对位置
fitness = None
def __init__(self):
self.nodes = []
for pos in range(self.n_cols):
self.nodes.append(self._create_random_node(pos))
for i in range(1, self.n_outputs + 1):
self.nodes[-i].active = True
self.fitness = None
self._active_determined = False
def _create_random_node(self, pos):
node = Node(self.max_arity)
node.i_func = random.randint(0, len(self.function_set) - 1)
for i in range(self.function_set[node.i_func].arity):
# 确定节点的每个输入连接的是前面哪个节点(column)的输出
node.i_inputs[i] = random.randint(max(pos - self.level_back, -self.n_inputs), pos - 1)
node.weights[i] = random.uniform(self.weight_range[0], self.weight_range[1])
node.i_output = pos
return node
def _determine_active_nodes(self):
"""
Determine which nodes in the CGP graph are active
"""
# check each node in reverse order
n_active = 0
for node in reversed(self.nodes):
if node.active:
n_active += 1
for i in range(self.function_set[node.i_func].arity):
i_input = node.i_inputs[i]
if i_input >= 0: # a node (not an input)
self.nodes[i_input].active = True
if VERBOSE:
print("# active genes: ", n_active)
def eval(self, *args):
"""
Given inputs, evaluate the output of this CGP individual.
:return the final output value
"""
if not self._active_determined:
self._determine_active_nodes()
self._active_determined = True
# forward pass: evaluate
for node in self.nodes:
if node.active:
inputs = []
for i in range(self.function_set[node.i_func].arity):
i_input = node.i_inputs[i]
w = node.weights[i]
if i_input < 0:
inputs.append(args[-i_input - 1] * w)
else:
inputs.append(self.nodes[i_input].output * w)
node.output = self.function_set[node.i_func](*inputs)
return self.nodes[-1].output
def mutate(self, mut_rate=0.01):
"""
Mutate this individual. Each gene is varied with probability *mut_rate*.
:param mut_rate: mutation probability
:return a child after mutation
"""
child = copy.deepcopy(self)
for pos, node in enumerate(child.nodes):
# mutate the function gene
if random.random() < mut_rate:
node.i_func = random.choice(range(len(self.function_set)))
# mutate the input genes (connection genes)
arity = self.function_set[node.i_func].arity
for i in range(arity):
if node.i_inputs[i] is None or random.random() < mut_rate: # if the mutated function requires more arguments, then the last ones are None
node.i_inputs[i] = random.randint(max(pos - self.level_back, -self.n_inputs), pos - 1)
if node.weights[i] is None or random.random() < mut_rate:
node.weights[i] = random.uniform(self.weight_range[0], self.weight_range[1])
# initially an individual is not active except hte last output node
node.active = False
for i in range(1, self.n_outputs + 1):
child.nodes[-i].active = True
child.fitness = None
child._active_determined = False
return child
# function set
def protected_div(a, b):
if abs(b) < 1e-6:
return a
return a / b
fs = [
Function(op.add, 2),
Function(op.sub, 2),
Function(op.mul, 2),
Function(protected_div, 2),
Function(op.neg, 1)
]
Individual.function_set = fs
Individual.max_arity = max(f.arity for f in fs)
def evolve(pop, mut_rate, mu, lambda_):
"""
Evolve the population *pop* using the mu + lambda evolutionary strategy
:param pop: a list of individuals, whose size is mu + lambda. The first mu ones are previous parents.
:param mut_rate: mutation rate
:return: a new generation of individuals of the same size
"""
pop = sorted(pop, key=lambda ind: ind.fitness) # stable sorting
parents = pop[-mu:]
# generate lambda new children via mutation
offspring = []
for _ in range(lambda_):
parent = random.choice(parents)
offspring.append(parent.mutate(mut_rate))
return parents + offspring
def create_population(n):
"""
Create a random population composed of n individuals.
"""
return [Individual() for _ in range(n)]
cgp.py来自https://github.com/ShuhuaGao/gpFlappyBird
实验效果如下图,横轴episode,纵轴reward,限制最大reward 1500。