diff --git a/TestRL.py b/TestRL.py new file mode 100644 index 0000000..69ec88b --- /dev/null +++ b/TestRL.py @@ -0,0 +1,17 @@ +import numpy as np +import MDP +import RL + + +''' Construct simple MDP as described in Lecture 2a Slides 13-14''' +T = np.array([[[0.5,0.5,0,0],[0,1,0,0],[0.5,0.5,0,0],[0,1,0,0]],[[1,0,0,0],[0.5,0,0,0.5],[0.5,0,0.5,0],[0,0,0.5,0.5]]]) +R = np.array([[0,0,10,10],[0,0,10,10]]) +discount = 0.9 +mdp = MDP.MDP(T,R,discount) +rlProblem = RL.RL(mdp, np.random.normal) + +# Test Q-learning +[Q,policy,_] = rlProblem.qLearning(s0=0,initialQ=np.zeros([mdp.nActions,mdp.nStates]),nEpisodes=1000,nSteps=100,epsilon=0.3) +print ("\nQ-learning results") +print (Q) +print (policy) \ No newline at end of file