强化学习AI对下井字棋

基于TensorFLow2.0做了一个伪强化学习课程作业,可以实现两个人工智能不断对弈,并从中汲取经验,彼此对抗升级。最近事情比较多,直接把源码扔这供以后参考吧。

神经网络定义、训练代码实现:

import tensorflow as tf
import numpy as np

#一号选手(模拟AI)
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(9,input_shape=[9]),
  tf.keras.layers.Dense(30, activation='relu'),
  tf.keras.layers.Dense(20, activation='relu'),
  tf.keras.layers.Dense(20, activation='relu'),
  tf.keras.layers.Dense(9, activation='softmax')
])
#二号选手(模拟玩家)
model2 = tf.keras.models.Sequential([
  tf.keras.layers.Dense(9,input_shape=[9]),
  tf.keras.layers.Dense(30, activation='sigmoid'),
  tf.keras.layers.Dense(60, activation='sigmoid'),
  tf.keras.layers.Dense(30, activation='sigmoid'),
  tf.keras.layers.Dense(9, activation='softmax')
])

#模型训练方法
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
def transformData(allGame):#训练数据对调
    transGame=allGame.copy()
    for game in transGame:
        for x in game:
            if x==0:
                x=1
            else:
                x=0
    return transGame
#判断游戏是否结束和获胜者
def winGame(game):
    for i in range(3):
        if game[3*i]==game[3*i+1] and game[3*i+1]==game[3*i+2]:
            if game[3*i]==1:
                #print("玩家胜1")
                return 0,1
            if game[3*i]==0:
                #print("AI胜")
                return 0,0
        if game[i]== game[3+i]== game[6+i]:
            if game[i]==1:
                #print("玩家胜2")
                return 0,1
            if game[i]==0:
                #print("AI胜")
                return 0,0
    if game[0]==game[4]==game[8]:
        if game[0]==1:
                #print("玩家胜3")
                return 0,1
        if game[0]==0:
                #print("AI胜")
                return 0,0
    if game[2]==game[4]==game[6]:
        if game[2]==1:
                #print("玩家胜4")
                return 0,1
        if game[2]==0:
                #print("AI胜")
                return 0,0
    if game.count(-1)==0:
        #print("平局!")
        return 0,-1
    return 1,-1
#展示棋盘现状
def showGame(game):
    for i in range(3):
        s=""
        for j in range(3):
            if game[3*i+j]==-1:
                s+=" . "
            if game[3*i+j]==1:
                s+=" o "
            if game[3*i+j]==0:
                s+=" x "
        print(s)

#一号选手的学习过程
def ML_Fit(allGame,allAction):   
    a_game=np.array(allGame)
    a_action=np.array(allAction)
    model.fit(a_game,a_action,epochs=200,verbose=0)
    prediction=model.predict(np.array([game]))  
    #print(prediction,np.argmax(prediction))
    return np.argmax(prediction)  

#二号选手的学习过程
def ML_Fit_self(allGame,allAction): 
    a_game=np.array(allGame)
    a_action=np.array(allAction)
    model.fit(a_game,a_action,epochs=200,verbose=0)
    prediction=model2.predict(np.array([game]))  
    #print(prediction,np.argmax(prediction))
    return np.argmax(prediction)  

#一号选手实战    
def AI(game):
    for i in range(9):
        if game[i]==-1:
            oneStep=game.copy()
            oneStep[i]=0
            if winGame(oneStep)[1]==0:
                return i
    #单步会输,优先围堵。
    for i in range(9):
        if game[i]==-1:
            oneStep=game.copy()
            oneStep[i]=1
            if winGame(oneStep)[1]==1:
                return i
    prediction=model.predict(np.array([game]))  
    #print(prediction,np.argmax(prediction))
    while game[np.argmax(prediction)]!=-1:
        prediction[0][np.argmax(prediction)]=-1
    return np.argmax(prediction)

#二号选手实战    
def AI_self(game):
    #不允许出现低级错误,单步能胜直接执行。
    for i in range(9):
        if game[i]==-1:
            oneStep=game.copy()
            oneStep[i]=1
            if winGame(oneStep)[1]==1:
                return i
    #单步会输,优先围堵。
    for i in range(9):
        if game[i]==-1:
            oneStep=game.copy()
            oneStep[i]=0
            if winGame(oneStep)[1]==0:
                return i   
    prediction=model2.predict(np.array([game]))  
    #print(prediction,np.argmax(prediction))
    while game[np.argmax(prediction)]!=-1:#已有棋子,则选择第二高的值
        prediction[0][np.argmax(prediction)]=-1
    return np.argmax(prediction)

两个AI对弈对抗训练代码实现:

epochs=200#总训练胜场盘数
winTime=0#一号选手胜场数
winTime_self=0#二号选手胜场数
noWinner=0
i=0
#一号选手的教材
all_allGame=[]#存储棋盘数据
all_allAction=[]#存储棋盘动作
#二号选手的教材
all_allGame_self=[]
all_allAction_self=[]

#while winTime<epochs or winTime_self<epochs:#胜场数
while i<epochs:#总盘数
    #棋盘数据初始化
    i+=1
    game=[-1 for i in range(9)]
    allGame=[]#存储单盘棋盘数据
    allAction=[]#存储单盘棋盘动作
    allGame_self=[]
    allAction_self=[]
    #开始下棋!
    while winGame(game)[0]:
        if len(all_allGame)>100:
            all_allGame.pop(0)
            all_allAction.pop(0)
            print("删除旧样本")
        if len(all_allGame_self)>100:
            print("删除旧样本")
            all_allGame_self.pop(0)
            all_allAction_self.pop(0)
        m=AI_self(game)
        if game[m]!=-1:
            print("已有落子!请重下")
            continue
        #print("假想敌下")
        game[m]=1
        allGame_self+=[game]
        allAction_self+=[m]
        #showGame(game)
        #print("------------")
        if winGame(game)[0]==0:
            break
        #print("实战AI下")
        n=AI(game);
        allGame+=[game]
        allAction+=[n]
        game[n]=0
        #showGame(game)
    #print(allGame,allAction)
    if winGame(game)[1]!=1:#一号选手后手获胜或平局就学一号的下法
        
        all_allGame+=allGame
        all_allAction+=allAction
        if winGame(game)[1]==0:
            winTime+=1
            print("AI获胜!")
        ML_Fit(all_allGame,all_allAction)
    if winGame(game)[1]==1:#二号选手先手获胜就学二号的下法
        winTime_self+=1
        all_allGame_self+=allGame_self
        all_allAction_self+=allAction_self
        ML_Fit(transformData(all_allGame_self),all_allAction_self)
        ML_Fit_self(all_allGame_self,all_allAction_self)
    if winGame(game)[1]==-1:
        noWinner+=1
        print("平局")
    print("当前盘数:",i)
#输出获胜次数
print("AI获胜次数:",winTime,"模拟AI获胜次数:",winTime_self,"平局次数:",noWinner)
#系统提示训练对局结束,下面和真人较量!
import winsound
winsound.PlaySound("SystemHand", winsound.SND_ALIAS)

玩家检验训练成果:

玩家先行:

# 一号选手
allGame=[]
allAction=[]
game=[-1 for i in range(9)]
while winGame(game)[0]:
    m=int(input())
    if game[m]!=-1:
        print("已有落子!请重下")
        continue
    print("玩家下")
    game[m]=1
    showGame(game)
    print("------------")
    if winGame(game)[0]==0:
        break
    print("AI下")
    n=AI(game);
    game[n]=0
    showGame(game)
if winGame(game)[1]==1:
    print("玩家获胜")
if winGame(game)[1]==0:
    print("AI获胜")
if winGame(game)[1]==-1:
    print("平局!")

玩家后手:

#二号选手

game=[-1 for i in range(9)]
while winGame(game)[0]:
    print("AI_self下")
    n=AI_self(game);
    game[n]=1
    showGame(game)
    if winGame(game)[0]==0:
        break
    print("------------")
    m=int(input())
    while game[m]!=-1:
        m=int(input())
        print("已有落子!请重下")
    print("玩家下")
    game[m]=0
    showGame(game) 
    if winGame(game)[0]==0:
        break
if winGame(game)[1]==1:
    print("AI_self获胜")
if winGame(game)[1]==0:
    print("玩家获胜")
if winGame(game)[1]==-1:
    print("平局!")

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是一个简单的强化学习训练井字棋Python代码示例: ```python import numpy as np # 初始化游戏棋盘 board = np.zeros((3, 3)) # 定义玩家和AI的棋子类型 player_piece = 1 ai_piece = -1 # 定义游戏结束的条件 def game_over(board): # 判断是否有一方连成三个棋子 for i in range(3): if sum(board[i, :]) == 3 or sum(board[:, i]) == 3: return True if board[0, 0] + board[1, 1] + board[2, 2] == 3 or board[0, 2] + board[1, 1] + board[2, 0] == 3: return True # 判断是否已经下满了所有的格子 if np.all((board == 0) == False): return True return False # 定义玩家的落子函数 def player_move(board): while True: row = int(input("请输入要下的行数(1~3):")) - 1 col = int(input("请输入要下的列数(1~3):")) - 1 if row >= 0 and row < 3 and col >= 0 and col < 3 and board[row, col] == 0: board[row, col] = player_piece break # 定义AI的落子函数 def ai_move(board): best_score = -np.inf best_move = None for i in range(3): for j in range(3): if board[i, j] == 0: board[i, j] = ai_piece score = minimax(board, 0, False) board[i, j] = 0 if score > best_score: best_score = score best_move = (i, j) board[best_move[0], best_move[1]] = ai_piece # 定义minimax算法 def minimax(board, depth, is_maximizing): if game_over(board): if is_maximizing: return -1 else: return 1 if is_maximizing: best_score = -np.inf for i in range(3): for j in range(3): if board[i, j] == 0: board[i, j] = ai_piece score = minimax(board, depth + 1, False) board[i, j] = 0 best_score = max(best_score, score) return best_score else: best_score = np.inf for i in range(3): for j in range(3): if board[i, j] == 0: board[i, j] = player_piece score = minimax(board, depth + 1, True) board[i, j] = 0 best_score = min(best_score, score) return best_score # 开始游戏 while not game_over(board): player_move(board) if game_over(board): break ai_move(board) print(board) # 判断胜负 if sum(board[0, :]) == 3 or sum(board[1, :]) == 3 or sum(board[2, :]) == 3: print("你赢了!") elif sum(board[:, 0]) == 3 or sum(board[:, 1]) == 3 or sum(board[:, 2]) == 3: print("你赢了!") elif board[0, 0] + board[1, 1] + board[2, 2] == 3 or board[0, 2] + board[1, 1] + board[2, 0] == 3: print("你赢了!") elif sum(board[0, :]) == -3 or sum(board[1, :]) == -3 or sum(board[2, :]) == -3: print("你输了!") elif sum(board[:, 0]) == -3 or sum(board[:, 1]) == -3 or sum(board[:, 2]) == -3: print("你输了!") elif board[0, 0] + board[1, 1] + board[2, 2] == -3 or board[0, 2] + board[1, 1] + board[2, 0] == -3: print("你输了!") else: print("平局!") ``` 这个代码使用了minimax算法来实现AI的决策。minimax算法是一种递归算法,会考虑所有可能的落子情况,并计算每种情况下的得分,然后选择得分最优的落子。由于井字棋的状态空间相对较小,因此使用minimax算法是可行的。在实际的应用中,可能需要使用更复杂的算法来处理更大的状态空间。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值