强化学习系列文章(二十五):利用遗传规划算法解决CartPole问题

强化学习系列文章(二十五):利用遗传规划算法解决CartPole问题

主程序

import gym
import numpy as np
import cgp
from cgp import *
import matplotlib.pyplot as plt

n_eps = 300
Mu = 10
Lambda = 40

env = gym.make('CartPole-v1')
env = env.unwrapped

r_list = []
pop = create_population(Mu+Lambda)
for e in range(n_eps):
    for ind in pop:
        obs = env.reset()
        done = False
        reward = 0
        while not done:
            x, xx, w, ww = obs
            action = 0 if ind.eval(x, xx, w, ww) > 0 else 1
            obs,r,done,_ = env.step(action)
            reward += r
            if reward >= 1500: break
        ind.fitness = reward
        
    pop = evolve(pop, 0.015, Mu, Lambda)
    print(e,'\t',pop[0].fitness)
    r_list.append(pop[0].fitness)

plt.plot(r_list)
plt.show()

遗传规划cgp.py

"""
Cartesian genetic programming
"""
import operator as op
import random
import copy
import math
from settings import VERBOSE, N_COLS, LEVEL_BACK


class Function:
    """
    A general function
    arity: 函数的输入参数的数量
    """

    def __init__(self, f, arity, name=None):
        self.f = f
        self.arity = arity
        self.name = f.__name__ if name is None else name

    def __call__(self, *args, **kwargs):
        return self.f(*args, **kwargs)


class Node:
    """
    A node in CGP graph
    """
    def __init__(self, max_arity):
        """
        Initialize this node randomly
        """
        self.i_func = None
        self.i_inputs = [None] * max_arity
        self.weights = [None] * max_arity
        self.i_output = None
        self.output = None
        self.active = False


class Individual:
    """
    An individual (chromosome, genotype, etc.) in evolution
    
    """
    function_set = None
    weight_range = [-1, 1]
    max_arity = 3
    n_inputs = 3
    n_outputs = 1
    n_cols = N_COLS # number of cols (nodes) in a single-row CGP
    level_back = LEVEL_BACK # 后面的节点可以最远连接的前面节点的相对位置
    fitness = None

    def __init__(self):
        self.nodes = []
        for pos in range(self.n_cols):
            self.nodes.append(self._create_random_node(pos))
        
        for i in range(1, self.n_outputs + 1):
            self.nodes[-i].active = True
        self.fitness = None
        self._active_determined = False

    def _create_random_node(self, pos):
        node = Node(self.max_arity)
        node.i_func = random.randint(0, len(self.function_set) - 1)
        for i in range(self.function_set[node.i_func].arity):
            # 确定节点的每个输入连接的是前面哪个节点(column)的输出
            node.i_inputs[i] = random.randint(max(pos - self.level_back, -self.n_inputs), pos - 1)
            node.weights[i] = random.uniform(self.weight_range[0], self.weight_range[1])
        node.i_output = pos

        return node

    def _determine_active_nodes(self):
        """
        Determine which nodes in the CGP graph are active
        """
        # check each node in reverse order
        n_active = 0
        for node in reversed(self.nodes):
            if node.active:
                n_active += 1
                for i in range(self.function_set[node.i_func].arity):
                    i_input = node.i_inputs[i]
                    if i_input >= 0:  # a node (not an input)
                        self.nodes[i_input].active = True
        if VERBOSE:
            print("# active genes: ", n_active)

    def eval(self, *args):
        """
        Given inputs, evaluate the output of this CGP individual.
        :return the final output value
        """
        if not self._active_determined:
            self._determine_active_nodes()
            self._active_determined = True
        # forward pass: evaluate
        for node in self.nodes:
            if node.active:
                inputs = []
                for i in range(self.function_set[node.i_func].arity):
                    i_input = node.i_inputs[i]
                    w = node.weights[i]
                    if i_input < 0:
                        inputs.append(args[-i_input - 1] * w)
                    else:
                        inputs.append(self.nodes[i_input].output * w)
                node.output = self.function_set[node.i_func](*inputs)
        return self.nodes[-1].output

    def mutate(self, mut_rate=0.01):
        """
        Mutate this individual. Each gene is varied with probability *mut_rate*.
        :param mut_rate: mutation probability
        :return a child after mutation
        """
        child = copy.deepcopy(self)
        for pos, node in enumerate(child.nodes):
            # mutate the function gene
            if random.random() < mut_rate:
                node.i_func = random.choice(range(len(self.function_set)))
            # mutate the input genes (connection genes)
            arity = self.function_set[node.i_func].arity
            for i in range(arity):
                if node.i_inputs[i] is None or random.random() < mut_rate:  # if the mutated function requires more arguments, then the last ones are None 
                    node.i_inputs[i] = random.randint(max(pos - self.level_back, -self.n_inputs), pos - 1)
                if node.weights[i] is None or random.random() < mut_rate:
                    node.weights[i] = random.uniform(self.weight_range[0], self.weight_range[1])
            # initially an individual is not active except hte last output node
            node.active = False
        for i in range(1, self.n_outputs + 1):
            child.nodes[-i].active = True
        child.fitness = None
        child._active_determined = False
        return child


# function set
def protected_div(a, b):
    if abs(b) < 1e-6:
        return a
    return a / b

fs = [
        Function(op.add, 2), 
        Function(op.sub, 2), 
        Function(op.mul, 2), 
        Function(protected_div, 2), 
        Function(op.neg, 1)
    ]
Individual.function_set = fs
Individual.max_arity = max(f.arity for f in fs)


def evolve(pop, mut_rate, mu, lambda_):
    """
    Evolve the population *pop* using the mu + lambda evolutionary strategy

    :param pop: a list of individuals, whose size is mu + lambda. The first mu ones are previous parents.
    :param mut_rate: mutation rate
    :return: a new generation of individuals of the same size
    """
    pop = sorted(pop, key=lambda ind: ind.fitness)  # stable sorting
    parents = pop[-mu:]
    # generate lambda new children via mutation
    offspring = []
    for _ in range(lambda_):
        parent = random.choice(parents)
        offspring.append(parent.mutate(mut_rate))
    return parents + offspring


def create_population(n):
    """
    Create a random population composed of n individuals.
    """
    return [Individual() for _ in range(n)]

cgp.py来自https://github.com/ShuhuaGao/gpFlappyBird

实验效果如下图,横轴episode,纵轴reward,限制最大reward 1500。
在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值