import java.io.FileNotFoundException; import java.io.IOException; // // ProbCollection.java // DecDynProg // // Created by Daniel Bernstein on Sun Nov 09 2003. // expanded by Christopher Amato. // /** * Defines the tranistion, observation and reward matrices * */ public class ProblemData3Probs { public double[][][][] trans, rew; //OLD====================== double[][][][][][] obs; public double [][][][][][] obs; public double[] startDist; private static void checkSumToOne(double trans[][][][], double obs[][][][][][]) { // check that trans probs sum to one double count; for (int s = 0; s < trans.length; s++) { for (int a1 = 0; a1 < trans[0].length; a1++) { for (int a2 = 0; a2 < trans[0][0].length; a2++) { count = 0; for (int s_ = 0; s_ < trans[0][0][0].length; s_++) { count += trans[s][a1][a2][s_]; } if (count > 1.01||count <0.99) { System.out.println("DONT SUM TO ONE STATE! " + count + " " + s + " " + a1 + " " + a2); // System.exit(1); } } } } // check that obs probs sum to one for (int s = 0; s < obs[0][0].length; s++) { for (int a1 = 0; a1 < obs[0][0][0].length; a1++) { for (int a2 = 0; a2 < obs[0][0][0][0].length; a2++) { for (int s_ = 0; s_ < obs[0][0][0][0][0].length; s_++) { count = 0; for (int o1 = 0; o1 < obs.length; o1++) { for (int o2 = 0; o2 < obs[0].length; o2++) { count += obs[o1][o2][s][a1][a2][s_]; } } if (count > 1.01 || count < 0.99) { System.out.println("DONT SUM TO ONE OBS! " + count +" s=" + s + " a1=" + a1 + " a2=" + a2+ " s_="+s_ ); // System.exit(1); } } } } } } public ProblemData3Probs(int problem) { switch (problem) { case 9: // DEC-MDP recycling robot System.out.println("My DEC-MDP Recycling Robot Problem..."); int numStates=2*2; int numObs=2; int numAct=3; int numStatesEach =2; /*The problem can be described as follows: * Each robot chooses to search for the big item, search for the little item, * wait or recharge. The state is the status of the battery power for each agent * Searching for the big item increases the chance of depleting the battery, and if both * robots get the big item then the reward is higher * Searching for the little item depletes the battery a smaller amount and never fails * Recharging recharges the battery, but does not provide a reward (do this instead of waiting) * */ startDist = new double[numStates]; startDist[0] = 1.0; startDist[1] = 0.0; startDist[2] = 0.0; startDist[3] = 0.0; //actions for each agent are: searchbig, searchlittle, wait and recharge //states (the battery status) are high and low for each agent //rewards //trans[s][a1][a2][s_] //these are idependent so trans[s1,s2][a1][a2][s1,s2]=trans[s1][a1][s1]*trans[s2][a2][s2] double[][][] transEach = new double[numStatesEach][numAct][numStatesEach]; //the prob of staying on high charge after a search for the big item double alphaBig=0.5; //the prob of staying on high charge after a search for the little item double alphaSmall=0.7; //the prob of depleting the battery after a search for the big item double betaBig=0.3; //the prob of depleting the battery after a search for the small item double betaSmall=0.2; //P(s'|a, s) //trans[s][a][s_] //searching for the big thing has a higher prob of depleting battery //retaining a high battery level after each action transEach[0][2][0]=alphaBig; transEach[0][1][0]=alphaSmall; transEach[0][0][0]=1.0; //transitioning from the high to the low battery level after each action transEach[0][2][1]=(1-alphaBig); transEach[0][1][1]=(1-alphaSmall); transEach[0][0][1]=0.0; //transitioning from low to high on searching means the battery ran out transEach[1][2][0]=betaBig; transEach[1][1][0]=betaSmall; transEach[1][0][0]=1.0; transEach[1][2][1]=(1-betaBig); transEach[1][1][1]=(1-betaSmall); transEach[1][0][1]=0.0; trans = new double[numStates][numAct][numAct][numStates]; System.out.println("trans"); for(int s1=0;s1 0 probability obs[0][0][0][2][2][1] = 0.25; obs[1][0][0][2][2][1] = 0.25; obs[0][1][0][2][2][1] = 0.25; obs[1][1][0][2][2][1] = 0.25; obs[0][0][1][2][2][0] = 0.25; obs[1][0][1][2][2][0] = 0.25; obs[0][1][1][2][2][0] = 0.25; obs[1][1][1][2][2][0] = 0.25; //Tiger stayed, nobody openend, heard different noises obs[0][0][0][2][2][0] = 0.7225; obs[1][0][0][2][2][0] = 0.1275; obs[0][1][0][2][2][0] = 0.1275; obs[1][1][0][2][2][0] = 0.0225; obs[0][0][1][2][2][1] = 0.0225; obs[1][0][1][2][2][1] = 0.1275; obs[0][1][1][2][2][1] = 0.1275; obs[1][1][1][2][2][1] = 0.7225; // reward function A // rew[STATE][ACTION1][ACTION2] rew = new double[2][m_states][m_actions][m_actions]; for(int agent=0;agent<2;agent++){ rew[agent][0][0][0] = -50; rew[agent][0][0][1] = -100; rew[agent][0][1][0] = -100; rew[agent][0][1][1] = 20; rew[agent][1][0][0] = 20; rew[agent][1][0][1] = -100; rew[agent][1][1][0] = -100; rew[agent][1][1][1] = -50; rew[agent][0][0][2] = -101; rew[agent][0][2][0] = -101; rew[agent][0][1][2] = 9; rew[agent][0][2][1] = 9; rew[agent][1][0][2] = 9; rew[agent][1][2][0] = 9; rew[agent][1][1][2]= -101; rew[agent][1][2][1] = -101; rew[agent][0][2][2] = -2; rew[agent][1][2][2] = -2; } break; } } }