From ee68ca8046082ad403c0107a6f1887f2f39e52bd Mon Sep 17 00:00:00 2001 From: p74035216 Date: Wed, 22 Nov 2023 23:53:05 +0800 Subject: [PATCH] ADD file via upload --- TestRL.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 TestRL.py diff --git a/TestRL.py b/TestRL.py new file mode 100644 index 0000000..69ec88b --- /dev/null +++ b/TestRL.py @@ -0,0 +1,17 @@ +import numpy as np +import MDP +import RL + + +''' Construct simple MDP as described in Lecture 2a Slides 13-14''' +T = np.array([[[0.5,0.5,0,0],[0,1,0,0],[0.5,0.5,0,0],[0,1,0,0]],[[1,0,0,0],[0.5,0,0,0.5],[0.5,0,0.5,0],[0,0,0.5,0.5]]]) +R = np.array([[0,0,10,10],[0,0,10,10]]) +discount = 0.9 +mdp = MDP.MDP(T,R,discount) +rlProblem = RL.RL(mdp, np.random.normal) + +# Test Q-learning +[Q,policy,_] = rlProblem.qLearning(s0=0,initialQ=np.zeros([mdp.nActions,mdp.nStates]),nEpisodes=1000,nSteps=100,epsilon=0.3) +print ("\nQ-learning results") +print (Q) +print (policy) \ No newline at end of file