java的类型强化_java_强化技术训练

48304ba5e6f9fe08f3fa1abda7d326ab.png

package com.cyq.Demo2;

import java.util.Scanner;

import static com.cyq.Demo2.Season.*;

import static java.lang.System.in;

public class Demo1 {

public static void main(String[] args) {

System.out.println("Season.values");

Season[] arr= Season.values();

for (Season s :

arr) {

System.out.println(s.name());

System.out.println(s.ordinal());

System.out.println("--------------");

}

System.out.println(arr);

System.out.println("=============================");

System.out.println("请输入SPRING,SUMMER,AUTUMN,WINTER中的一个:");

Scanner sc=new Scanner(System.in);

Season seacon= Season.valueOf(sc.next());

show(seacon);

System.out.println("请输入0-3之间的数:");

int s=Integer.valueOf(sc.next());

show2(s);

}

public static void show(Season s){

switch (s){

case SPRING:

System.out.println("春天来了!");

break;

case SUMMER:

System.out.println("夏天来了!");

break;

case AUTUMN:

System.out.println("秋天来了!");

break;

case WINTER:

System.out.println("冬天来了!");

break;

default:

System.out.println("输入错误!");

}

}

public static void show2(int season){

switch (season){

case 0:

System.out.println("春天来了!");

break;

case 1:

System.out.println("夏天来了!");

break;

case 2:

System.out.println("秋天来了!");

break;

case 3:

System.out.println("冬天来了!");

break;

default:

System.out.println("输入错误!");

}

}

}

48304ba5e6f9fe08f3fa1abda7d326ab.png

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是一个使用 Java 实现的强化学习算法案例,它使用 Q-learning 算法来训练一个智能体来玩迷宫游戏: ```java import java.util.Arrays; import java.util.Random; public class QLearning { private static final double ALPHA = 0.1; // 学习率 private static final double GAMMA = 0.9; // 折扣因子 private static final double EPSILON = 0.1; // 探索概率 private static final int NUM_EPISODES = 100; // 训练次数 private static final int NUM_ACTIONS = 4; // 动作数 private static final String[] ACTIONS = {"up", "down", "left", "right"}; // 动作列表 public static void main(String[] args) { int[][] maze = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0}, {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}, {0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, }; int[] startState = {1, 1}; int[] goalState = {9, 9}; // 初始化 Q 表 double[][][] qTable = new double[maze.length][maze[0].length][NUM_ACTIONS]; for (double[][] row : qTable) { for (double[] col : row) { Arrays.fill(col, 0); } } // 训练 Q-learning 算法 for (int i = 0; i < NUM_EPISODES; i++) { int[] state = startState.clone(); while (!Arrays.equals(state, goalState)) { // 选择行动 String action; if (Math.random() < EPSILON) { action = ACTIONS[new Random().nextInt(NUM_ACTIONS)]; } else { action = ACTIONS[argmax(qTable[state[0]][state[1]])]; } // 执行行动 int[] nextState; switch (action) { case "up": nextState = new int[]{state[0] - 1, state[1]}; break; case "down": nextState = new int[]{state[0] + 1, state[1]}; break; case "left": nextState = new int[]{state[0], state[1] - 1}; break; case "right": nextState = new int[]{state[0], state[1] + 1}; break; default: throw new IllegalStateException("Unexpected value: " + action); } // 计算奖励 double reward; if (maze[nextState[0]][nextState[1]] == 1) { reward = 0; } else { reward = 1; } // 更新 Q 值 qTable[state[0]][state[1]][Arrays.asList(ACTIONS).indexOf(action)] += ALPHA * (reward + GAMMA * max(qTable[nextState[0]][nextState[1]]) - qTable[state[0]][state[1]][Arrays.asList(ACTIONS).indexOf(action)]); state = nextState.clone(); } } // 测试 Q-learning 算法 int[] state = startState.clone(); while (!Arrays.equals(state, goalState)) { String action = ACTIONS[argmax(qTable[state[0]][state[1]])]; System.out.println("state: " + Arrays.toString(state) + ", action: " + action); switch (action) { case "up": state = new int[]{state[0] - 1, state[1]}; break; case "down": state = new int[]{state[0] + 1, state[1]}; break; case "left": state = new int[]{state[0], state[1] - 1}; break; case "right": state = new int[]{state[0], state[1] + 1}; break; default: throw new IllegalStateException("Unexpected value: " + action); } } System.out.println("state: " + Arrays.toString(state)); } // 获取最大值的下标 private static int argmax(double[] array) { int maxIndex = 0; double maxValue = array[0]; for (int i = 1; i < array.length; i++) { if (array[i] > maxValue) { maxIndex = i; maxValue = array[i]; } } return maxIndex; } // 获取最大值 private static double max(double[] array) { double maxValue = array[0]; for (double value : array) { if (value > maxValue) { maxValue = value; } } return maxValue; } } ``` 该代码首先定义了一个迷宫环境,然后使用 Q-learning 算法训练一个智能体来玩迷宫游戏。最后,它输出了智能体在迷宫的行动轨迹,以及到达目标状态的位置。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值