{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import aimBotEnv\n", "import PPO\n", "import numpy as np\n", "\n", "import tensorflow as tf\n", "import time\n", "import datetime\n", "\n", "from PPO import PPO\n", "from PPOBuffer import PPOBuffer\n", "from PPOConfig import PPOConfig\n", "from PPOHistoryRecorder import PPOHistory" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Attempts to allocate only the GPU memory needed for allocation\n", "physical_devices = tf.config.list_physical_devices(\"GPU\")\n", "tf.config.experimental.set_memory_growth(physical_devices[0], True)\n", "tf.random.set_seed(9331)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Env\n", "ENV_PATH = \"./Build-CloseEnemyCut/Aimbot-PPO\"\n", "WORKER_ID = 1\n", "BASE_PORT = 200\n", "\n", "MAX_EP = 1000\n", "EP_LENGTH = 100000\n", "GAMMA = 0.99 # discount future reward (UP?)\n", "EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]\n", "ACTOR_LR = 1e-5 # LR\n", "CRITIC_LR = 2e-5 # LR\n", "BATCH = 256 # learning step\n", "EPOCHS = 8\n", "ENTROPY_WHEIGHT = 0.001 # sigma's entropy in Actor loss\n", "ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps\n", "\n", "\n", "TRAIN = True\n", "SAVE_DIR = \"PPO-Model/\" + datetime.datetime.now().strftime(\"%m%d%H%M\") + \"/\"\n", "LOAD_DIR = None\n", "\n", "CTN_ACTION_RANGE = 10\n", "\n", "ppoConfig = PPOConfig()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# initialize enviroment & buffer class\n", "env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n", "ppoBuffer = PPOBuffer()\n", "ppoHistory = PPOHistory()\n", "\n", "STATE_SIZE = env.STATE_SIZE\n", "DISCRETE_SHAPE = env.DISCRETE_SHAPE\n", "DISCRETE_SIZE = env.DISCRETE_SIZE\n", "CONTINUOUS_SIZE = env.CONTINUOUS_SIZE\n", "ACTSPEC = env.ACTION_SPEC\n", "_, _, _, loadDir, _ = env.getSteps()\n", "\n", "agent = PPO(\n", " stateSize=STATE_SIZE,\n", " disActShape=DISCRETE_SHAPE,\n", " conActSize=CONTINUOUS_SIZE,\n", " conActRange=CTN_ACTION_RANGE,\n", " PPOConfig=ppoConfig,\n", ")\n", "\n", "# check load model or not\n", "if np.any(loadDir == 0):\n", " # create a new model\n", " print(\"No loadDir specified,Create a New Model\")\n", " LOAD_DIR = None\n", "else:\n", " # load model\n", " loadDirDateSTR = str(int(loadDir[0]))\n", " loadDirTimeSTR = str(int(loadDir[1]))\n", " if len(loadDirDateSTR) != 8:\n", " # fill lost 0 while converse float to string\n", " for _ in range(8 - len(loadDirDateSTR)):\n", " loadDirDateSTR = \"0\" + loadDirDateSTR\n", " if len(loadDirTimeSTR) != 6:\n", " # fill lost 0 while converse float to string\n", " for _ in range(6 - len(loadDirTimeSTR)):\n", " loadDirTimeSTR = \"0\" + loadDirTimeSTR\n", " LOAD_DIR = \"PPO-Model/\" + loadDirDateSTR + \"/\" + loadDirTimeSTR\n", " print(\"Load Model:\")\n", " print(LOAD_DIR)\n", "\n", "print(\"CONTINUOUS_SIZE\", CONTINUOUS_SIZE)\n", "print(\"DISCRETE_SIZE\", DISCRETE_SIZE)\n", "print(\"STATE_SIZE\", STATE_SIZE)\n", "\n", "disActShape = [3, 3, 2]\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "EP 0 START\n", "[0, 1, 0, array([-9.97837827])]\n", "[0, 1, 0, array([-9.98691182])]\n", "[0, 1, 0, array([-10.])]\n", "[0, 1, 0, array([-9.64988654])]\n", "[0, 1, 0, array([-7.57418975])]\n", "[0, 1, 0, array([-5.92892356])]\n", "[0, 1, 0, array([-6.17624156])]\n", "[0, 1, 0, array([-2.24651228])]\n", "[0, 1, 1, array([-4.46437881])]\n", "[0, 1, 0, array([-3.43449784])]\n", "[0, 1, 0, array([-0.60291197])]\n", "[0, 1, 0, array([0.08531085])]\n", "[0, 1, 0, array([-0.45389164])]\n", "[0, 1, 0, array([0.37792929])]\n", "[0, 1, 1, array([-1.33566601])]\n", "[0, 1, 0, array([-0.47963037])]\n", "[0, 1, 0, array([-0.54579713])]\n", "[0, 1, 0, array([-0.11555271])]\n", "[0, 1, 0, array([-0.7433499])]\n", "[0, 1, 0, array([0.03593643])]\n", "[0, 1, 0, array([-0.3937157])]\n", "[0, 1, 0, array([0.11989044])]\n", "[0, 1, 1, array([-0.81236673])]\n", "[0, 1, 0, array([1.09318675])]\n", "[0, 1, 0, array([-1.05842291])]\n", "[0, 1, 0, array([-0.13716247])]\n", "[0, 1, 0, array([0.63062648])]\n", "[0, 1, 0, array([-1.69533187])]\n", "[0, 0, 0, array([-0.38244634])]\n", "[0, 1, 0, array([-1.38137671])]\n", "[0, 1, 0, array([0.53938322])]\n", "[0, 1, 0, array([0.51478548])]\n", "[0, 1, 1, array([-0.11100765])]\n", "[0, 1, 0, array([-0.42890439])]\n", "[0, 1, 0, array([-0.36385959])]\n", "[0, 0, 0, array([-0.55843589])]\n", "[0, 1, 0, array([-0.02410512])]\n", "[0, 0, 1, array([-1.19189906])]\n", "[0, 1, 0, array([0.21983017])]\n", "[0, 1, 0, array([0.37856599])]\n", "[0, 1, 1, array([-0.48447338])]\n", "[0, 1, 0, array([0.48356156])]\n", "[0, 1, 0, array([-0.22375674])]\n", "[0, 1, 0, array([-0.52145602])]\n", "[0, 1, 0, array([0.51174054])]\n", "[0, 0, 0, array([-0.05832106])]\n", "[0, 1, 1, array([-0.88627272])]\n", "[0, 1, 0, array([-0.03064693])]\n", "[0, 1, 0, array([-0.6602404])]\n", "[0, 1, 1, array([-0.35516111])]\n", "[0, 1, 1, array([0.06982621])]\n", "[0, 1, 0, array([-0.4669471])]\n", "[0, 1, 0, array([-0.4053739])]\n", "[0, 0, 0, array([-0.03826078])]\n", "[0, 1, 0, array([-1.30099429])]\n", "[0, 1, 0, array([0.36407443])]\n", "[0, 1, 0, array([0.18388823])]\n", "[0, 1, 1, array([-0.59479871])]\n", "[0, 0, 0, array([-0.26183106])]\n", "[0, 2, 0, array([0.14005311])]\n", "[0, 1, 1, array([-0.50448942])]\n", "[0, 1, 1, array([-0.30438048])]\n", "[0, 1, 0, array([0.32387057])]\n", "[0, 0, 0, array([0.44578215])]\n", "[0, 0, 0, array([-0.68288912])]\n", "[0, 1, 1, array([-0.22745354])]\n", "[0, 1, 1, array([-1.44315195])]\n", "[0, 1, 1, array([-0.07839317])]\n", "[0, 1, 0, array([0.34598184])]\n", "[0, 1, 0, array([-0.54524618])]\n", "[0, 0, 1, array([0.5687016])]\n", "[0, 1, 1, array([-0.61567996])]\n", "[0, 1, 0, array([0.00746339])]\n", "[0, 1, 1, array([-1.07566994])]\n", "[0, 1, 1, array([-0.6757027])]\n", "[0, 1, 0, array([-0.12196091])]\n", "[0, 1, 0, array([0.64683347])]\n", "[0, 0, 0, array([-0.6340298])]\n", "[0, 1, 0, array([-0.01930979])]\n", "[0, 1, 1, array([0.04658404])]\n", "[0, 1, 0, array([-0.07395769])]\n", "[0, 1, 0, array([-1.19693177])]\n", "[0, 1, 0, array([-0.08378307])]\n", "[0, 1, 1, array([-0.3624297])]\n", "[0, 1, 1, array([0.20057263])]\n", "[0, 0, 0, array([-0.55188123])]\n", "[0, 1, 0, array([0.19255686])]\n", "[0, 0, 0, array([-0.35503351])]\n", "[0, 0, 0, array([-0.50078986])]\n", "[0, 1, 0, array([-0.50510786])]\n", "[0, 1, 0, array([-0.57153761])]\n", "[1, 1, 0, array([-0.16621574])]\n", "[0, 0, 1, array([-0.5885332])]\n", "[0, 1, 0, array([-0.11474287])]\n", "[0, 1, 0, array([0.12570003])]\n", "[0, 2, 0, array([0.09073465])]\n", "[0, 0, 0, array([-0.43608345])]\n", "[0, 1, 1, array([-0.43533329])]\n", "[0, 1, 0, array([-0.69747494])]\n", "[0, 0, 0, array([-0.26736301])]\n", "[0, 1, 1, array([-0.00405504])]\n", "[0, 1, 0, array([-0.28677529])]\n", "[0, 0, 0, array([-0.82367381])]\n", "[0, 1, 1, array([-0.53118829])]\n", "[0, 1, 0, array([-0.17245219])]\n", "[0, 1, 1, array([-0.81512184])]\n", "[0, 1, 0, array([-0.78884175])]\n", "[0, 1, 1, array([0.14143481])]\n", "[0, 0, 0, array([-0.2089322])]\n", "[0, 1, 1, array([-0.68556953])]\n", "[0, 1, 0, array([0.0326979])]\n", "[0, 1, 0, array([-0.68894363])]\n", "[0, 1, 1, array([-0.31106685])]\n", "[0, 1, 1, array([-0.31457812])]\n", "[0, 1, 0, array([-0.81168416])]\n", "[0, 1, 1, array([-0.02005783])]\n", "[0, 1, 1, array([-0.48878047])]\n", "[0, 2, 0, array([-0.05926955])]\n", "[0, 2, 1, array([-0.4681926])]\n", "[0, 1, 0, array([-0.40907601])]\n", "[0, 0, 1, array([-0.41412167])]\n", "[0, 0, 1, array([-0.37255823])]\n", "[0, 1, 1, array([-0.65528092])]\n", "[0, 1, 1, array([-0.43253259])]\n", "[0, 1, 0, array([-0.1397705])]\n", "[0, 0, 1, array([-0.56618627])]\n", "[0, 1, 1, array([-0.39328688])]\n", "[0, 1, 0, array([-0.66531288])]\n", "[0, 1, 0, array([-0.72612087])]\n", "[0, 1, 0, array([-0.61707316])]\n", "[0, 1, 0, array([-1.00322818])]\n", "[0, 0, 1, array([-0.87729813])]\n", "[0, 0, 1, array([-1.02736299])]\n", "[0, 1, 0, array([-1.19317126])]\n", "[0, 2, 0, array([-1.32728188])]\n", "[0, 1, 1, array([-1.71560216])]\n", "[0, 1, 1, array([-1.14711596])]\n", "[0, 0, 1, array([-1.57469699])]\n", "[0, 0, 1, array([-1.83247127])]\n", "[0, 1, 1, array([-2.91937])]\n", "[0, 2, 0, array([-3.57782645])]\n", "[0, 1, 0, array([-4.85294986])]\n", "[0, 1, 0, array([-5.52567955])]\n", "[0, 2, 0, array([-6.05648598])]\n", "[0, 0, 0, array([-4.3611569])]\n", "[0, 1, 1, array([-2.55303679])]\n", "[0, 0, 1, array([-2.17765898])]\n", "[0, 1, 0, array([-2.1107971])]\n", "[0, 1, 0, array([-2.16164318])]\n", "[0, 0, 1, array([-1.94931088])]\n", "[0, 1, 0, array([-2.76541562])]\n", "[0, 1, 0, array([-2.59743019])]\n", "[0, 0, 0, array([-2.70841501])]\n", "[0, 1, 1, array([-2.17638566])]\n", "[0, 0, 0, array([-1.45751219])]\n", "[0, 1, 1, array([-2.13177654])]\n", "[0, 2, 1, array([-1.41020595])]\n", "[0, 1, 0, array([-1.22194984])]\n", "[0, 0, 1, array([-1.16989697])]\n", "[0, 1, 1, array([-1.70600954])]\n", "[0, 0, 0, array([-0.86162724])]\n", "[0, 1, 1, array([-0.90767441])]\n", "[0, 1, 0, array([-2.08555947])]\n", "[0, 1, 0, array([-1.49361359])]\n", "[0, 1, 0, array([-1.44951952])]\n", "[0, 1, 1, array([-1.65972189])]\n", "[0, 1, 1, array([-1.182904])]\n", "[0, 1, 1, array([-1.62207036])]\n", "[0, 2, 0, array([-1.13806044])]\n", "[0, 1, 0, array([-1.05171702])]\n", "[0, 1, 0, array([-0.70734423])]\n", "[0, 0, 0, array([-1.1985315])]\n", "[0, 1, 1, array([-0.96393836])]\n", "[0, 1, 1, array([-0.11242301])]\n", "[0, 1, 0, array([-1.31423582])]\n", "[0, 0, 1, array([-0.53156151])]\n", "[0, 0, 0, array([-0.80210631])]\n", "[0, 0, 1, array([-0.90155855])]\n", "[0, 0, 1, array([-0.69736878])]\n", "[0, 1, 0, array([-1.24850775])]\n", "[0, 0, 1, array([-1.15889518])]\n", "[0, 1, 0, array([-1.69614764])]\n", "[0, 1, 1, array([-1.94686367])]\n", "[0, 1, 1, array([-2.58663485])]\n", "[0, 1, 0, array([-2.44671011])]\n", "[0, 0, 1, array([-2.33372746])]\n", "[0, 1, 1, array([-2.20399477])]\n", "[0, 0, 0, array([-2.74357332])]\n", "[0, 0, 1, array([-3.17867197])]\n", "[0, 0, 1, array([-3.34102608])]\n", "[0, 0, 1, array([-3.83051407])]\n", "[0, 1, 0, array([-5.00900225])]\n", "[0, 0, 1, array([-6.21143977])]\n", "[0, 1, 0, array([-6.85277795])]\n", "[0, 1, 0, array([-6.40021225])]\n", "[0, 1, 0, array([-4.49741925])]\n", "[0, 1, 1, array([-4.8828199])]\n", "[0, 1, 1, array([-4.95240928])]\n", "[0, 1, 1, array([-4.91505967])]\n", "[0, 1, 0, array([-4.10242358])]\n", "[0, 1, 0, array([-3.97951513])]\n", "[0, 1, 1, array([-4.39393992])]\n", "[0, 1, 1, array([-4.01163897])]\n", "[0, 0, 1, array([-4.01820346])]\n", "[0, 1, 1, array([-3.95445183])]\n", "[0, 1, 0, array([-4.52668016])]\n", "[0, 1, 1, array([-5.23900883])]\n", "[0, 0, 0, array([-4.94940753])]\n", "[0, 0, 1, array([-7.89200282])]\n", "[0, 1, 1, array([-7.74241437])]\n", "[0, 0, 1, array([-8.36491429])]\n", "[0, 1, 1, array([-9.62618536])]\n", "[0, 1, 0, array([-9.95622561])]\n", "[0, 1, 1, array([-9.8951381])]\n", "[0, 1, 0, array([-9.87486866])]\n", "[0, 1, 0, array([-9.4446276])]\n", "[0, 1, 0, array([-10.])]\n", "[0, 1, 0, array([-9.21388768])]\n", "[0, 1, 0, array([-9.36736109])]\n", "[0, 1, 0, array([-7.69439292])]\n", "[0, 1, 0, array([-6.82126605])]\n", "[0, 1, 0, array([-5.63043734])]\n", "[0, 1, 1, array([-1.35280247])]\n", "[0, 1, 0, array([-1.58250283])]\n", "[0, 1, 1, array([-1.68643652])]\n", "[0, 1, 1, array([-1.41479634])]\n", "[0, 1, 0, array([-0.82521777])]\n", "[0, 1, 1, array([-1.30009663])]\n", "[1, 1, 1, array([-1.02568303])]\n", "[0, 1, 1, array([-1.91950471])]\n", "[0, 1, 1, array([-0.1904922])]\n", "[0, 1, 0, array([-1.18773879])]\n", "[0, 1, 1, array([-0.42974429])]\n", "[0, 1, 1, array([-0.22308512])]\n", "[0, 1, 1, array([-0.54979847])]\n", "[0, 1, 0, array([-0.72797743])]\n", "[0, 1, 0, array([-0.22877314])]\n", "[0, 0, 0, array([0.31213725])]\n", "[0, 1, 0, array([-0.28328506])]\n", "[0, 1, 1, array([-1.03001752])]\n", "[2, 1, 1, array([-0.29810564])]\n", "[0, 0, 1, array([-0.84220163])]\n", "[0, 0, 0, array([-0.71541074])]\n", "[0, 1, 1, array([-0.32181016])]\n", "[0, 1, 0, array([-0.49104068])]\n", "[0, 1, 0, array([-0.09825997])]\n", "[0, 0, 0, array([0.13588543])]\n", "[0, 1, 1, array([0.11856532])]\n", "[0, 1, 1, array([-0.73461751])]\n", "[0, 1, 1, array([-0.38906399])]\n", "[0, 1, 0, array([-0.55525847])]\n", "[0, 0, 0, array([-0.34495453])]\n", "[1, 1, 1, array([-0.72165394])]\n", "[0, 1, 0, array([-0.38278675])]\n", "[0, 1, 0, array([-0.36388876])]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\PPOBuffer.py:27: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n", " return self.standDims(np.asarray(self.actions))\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n", "[0, 1, 0, array([10.])]\n" ] } ], "source": [ "bestScore = 200.0\n", "\n", "maxTotalReward = -99999999999\n", "\n", "for ep in range(MAX_EP):\n", " print(\"EP \", ep, \" START\")\n", " # first time run game\n", " s, _, _, _, _ = env.reset()\n", " if ep == 0:\n", " s = s.reshape([STATE_SIZE])\n", " step = 0\n", " done = False\n", "\n", " # save weight immediately?\n", " saveNow = 0\n", "\n", " epTotalReward = 0\n", " entropys = []\n", "\n", " while not done:\n", " step += 1\n", "\n", " actions, predictResult = agent.chooseAction(s)\n", " print(actions)\n", " avrEntropy, _, _ = agent.getAverageEntropy(predictResult)\n", " nextState, thisReward, done, _, saveNow = env.step(actions=actions)\n", "\n", " entropys.append(avrEntropy)\n", " ppoBuffer.saveBuffers(\n", " state=s, actorProb=predictResult, action=actions, reward=thisReward, done=done\n", " )\n", " epTotalReward += thisReward\n", "\n", " nextState = nextState.reshape([STATE_SIZE])\n", " s = nextState\n", "\n", " if done:\n", " print(\"EP OVER!\")\n", " if saveNow != 0:\n", " print(\"SAVENOW!\")\n", " saveNow = 0\n", " agent.saveWeights()\n", " # update PPO after Batch step or GameOver\n", " if (step + 1) % BATCH == 0 or done:\n", " if TRAIN:\n", " actorLosses, criticLosses = agent.trainCritcActor(\n", " states=ppoBuffer.getStates(),\n", " oldActorResult=ppoBuffer.getActorProbs(),\n", " actions=ppoBuffer.getActions(),\n", " rewards=ppoBuffer.getRewards(),\n", " dones=ppoBuffer.getDones(),\n", " nextState=nextState,\n", " epochs=EPOCHS,\n", " )\n", " ppoBuffer.clearBuffer()\n", " ppoHistory.saveHis(epTotalReward, np.mean(entropys), actorLosses, criticLosses)\n", "\n", " if epTotalReward > maxTotalReward and epTotalReward != 0:\n", " maxTotalReward = epTotalReward\n", " agent.saveWeights(epTotalReward)\n", " print(\"New Record! Save NN\", epTotalReward)\n" ] } ], "metadata": { "interpreter": { "hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32" }, "kernelspec": { "display_name": "Python 3.9.7 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }