ADD file via upload

This commit is contained in:
p74035216 2023-11-22 23:52:11 +08:00
parent 5bcc4ece38
commit 072c3de021
1 changed files with 109 additions and 0 deletions

109
q_learning.py Normal file
View File

@ -0,0 +1,109 @@
import numpy as np
import pygame
from time import time, sleep
from random import randint
import random
# parameters of maze
frame_interval = 0.1 # control run speed
n = 4 # size of maze
penalities = 4 # no. of red blocks
# parameters of q learning
alpha = 0.5 # learning rate
gamma = 0.9 # discount factor
epsilon = 1.0
epsilon_delta = 0.0001
# init maze
current_pos = (0,0)
reward = np.zeros((n,n))
terminals = []
background = (51,51,51)
screen = pygame.display.set_mode(((2*n+1)*100,n*100))
colors = [[background for i in range(n)] for j in range(n)]
while penalities > 0:
i = randint(0,n-1)
j = randint(0,n-1)
if reward[i][j]==0 and (i,j)!=(0,0) and (i,j)!=(n-1,n-1):
reward[i][j] = -1
penalities -= 1
colors[i][j] = (255,0,0)
terminals.append((i,j))
reward[n-1][n-1] = 1
colors[n-1][n-1] = (0,255,0)
terminals.append((n-1,n-1))
# init q learning
Q = {}
for i in range(n):
for j in range(n):
Q[(i,j)] = [0] * 4
def select_action(pos):
if np.random.random() <= epsilon:
action = randint(0, 3)
else:
action = np.argmax(Q[tuple(pos)])
return action
def step():
global current_pos, epsilon_delta, epsilon
#此处需要进行动作选取,给action赋值
new_pos = list(current_pos)
if action==0 and new_pos[0]>0: #move up
new_pos[0] -= 1
elif action==1 and new_pos[0]<n-1: #move down
new_pos[0] += 1
elif action==2 and new_pos[1]>0: #move left
new_pos[1] -= 1
elif action==3 and new_pos[1]<n-1: #move right
new_pos[1] += 1
new_pos = tuple(new_pos)
if new_pos not in terminals:
#此处实现Q-learning算法
else:
# 此处实现Q-learning算法
if epsilon > 0.05:
epsilon -= epsilon_delta
current_pos = new_pos
def layout():
for i in range(n):
for j in range(n):
pygame.draw.rect(screen, (255,255,255), (j*100,i*100,100,100), 0)
pygame.draw.rect(screen, colors[i][j], (j*100+3,i*100+3,94,94), 0)
pygame.draw.rect(screen, (255,255,255), ((j+n+1)*100,i*100,100,100), 0)
vij = max(Q[(i,j)])
qc = (0,int(255*vij),0) if vij>0 else (int(-255*vij),0,0)
pygame.draw.rect(screen, qc, ((j+n+1)*100+3,i*100+3,94,94), 0)
pygame.draw.circle(screen, (25,129,230), (current_pos[1]*100+50,current_pos[0]*100+50), 30, 0)
sleep_time = frame_interval
running = True
while running:
step()
sleep(sleep_time)
screen.fill(background)
layout()
for event in pygame.event.get():
if event.type == pygame.QUIT:
running = False
if event.type == pygame.KEYDOWN and event.key == pygame.K_SPACE:
sleep_time = 0 if sleep_time>0 else frame_interval
pygame.display.flip()
pygame.quit()