/** * This applet demonstrates a simple game. It isn't designed to be general or reusable.
* Copyright (C) 2006 David Poole.
* This program gives core of the simulation. The GUI is in SGameGUI.java. The environment code is at SGameEnv.java. This controller is at SGameQController.java.
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * @author David Poole poole@cs.ubc.ca * @version 0.41 2007-09-09 */ public class TGameQController extends TGameController { /** * Construct a new controller with the given environment */ TGameQController(TGameEnv environment) { super(environment); title = "Q-learning contoller"; } //String title = "Q-learning/SARSA contoller"; /** qvalues[xpos,ypos,action] */ double qvalues[][][] = new double[2][3][4]; int visits[][][] = new int[2][3][4]; /** The GUI uses qvalue(x,y,a) to display values and for the arrows. */ public double qvalue(int xval, int yval, int action) { return qvalues[xval][yval][action]; }; // surely this exists somewhere! int toInt(boolean Boo) { if (Boo) return 1; else return 0; } /** * resets the Q-values. * * @param initVal the initial value given by a box in the GUI */ public void doreset(double initVal) { for (int x=0; x<2; x++) for (int y=0; y<3; y++) for (int a=0; a<4; a++) { qvalues[x][y][a]=initVal; visits[x][y][a]=0; } } /** old X - needs to be remebered through steps for SARSA */ int oldX=0; /** old Y - needs to be remebered through steps for SARSA */ int oldY=2; /** prevAction - previous action */ int prevAction=0; /** prevReward - previous reward */ double prevReward=0.0; /** * does one step. * * carries out the action in the environment. This may be a place * to record what the agent has learned from its experience. *
The actions are