Aimbot-PPO/Aimbot-PPO-Python/Tensorflow/GAIL-Main.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\requests\\__init__.py:102: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (5.0.0)/charset_normalizer (2.0.6) doesn't match a supported version!\n",
      "  warnings.warn(\"urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported \"\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import time\n",
    "import datetime\n",
    "import aimBotEnv\n",
    "\n",
    "from GAIL import GAIL\n",
    "from GAILConfig import GAILConfig\n",
    "from PPOConfig import PPOConfig\n",
    "from GAILMem import GAILMem\n",
    "from GAILHistory import GAILHistory\n",
    "from IPython.display import clear_output\n",
    "from tqdm.notebook import tqdm as tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Attempts to allocate only the GPU memory needed for allocation\n",
    "physical_devices = tf.config.list_physical_devices(\"GPU\")\n",
    "tf.config.experimental.set_memory_growth(physical_devices[0], True)\n",
    "tf.random.set_seed(9331)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "√√√√√Enviroment Initialized Success√√√√√\n",
      "√√√√√Buffer Initialized Success√√√√√\n",
      "√√√√√Buffer Initialized Success√√√√√\n",
      "---------thisPPO Params---------\n",
      "self.stateSize =  93\n",
      "self.disActShape =  [3, 3, 2]\n",
      "self.disActSize 3\n",
      "self.disOutputSize 8\n",
      "self.conActSize =  1\n",
      "self.conActRange =  10\n",
      "self.conOutputSize =  2\n",
      "---------thisPPO config---------\n",
      "self.NNShape =  [512, 512, 256]\n",
      "self.criticLR =  0.002\n",
      "self.actorLR =  0.002\n",
      "self.gamma =  0.99\n",
      "self.lmbda =  0.95\n",
      "self.clipRange =  0.2\n",
      "self.entropyWeight =  0.005\n",
      "self.trainEpochs =  5\n",
      "self.saveDir =  GAIL-Model/1027-2240/\n",
      "self.loadModelDir =  None\n",
      "---------Actor Model Create Success---------\n",
      "Model: \"model_1\"\n",
      "__________________________________________________________________________________________________\n",
      " Layer (type)                   Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      " stateInput (InputLayer)        [(None, 93)]         0           []                               \n",
      "                                                                                                  \n",
      " dense0 (Dense)                 (None, 512)          48128       ['stateInput[0][0]']             \n",
      "                                                                                                  \n",
      " dense1 (Dense)                 (None, 512)          262656      ['dense0[0][0]']                 \n",
      "                                                                                                  \n",
      " dense2 (Dense)                 (None, 256)          131328      ['dense1[0][0]']                 \n",
      "                                                                                                  \n",
      " muOut (Dense)                  (None, 1)            257         ['dense2[0][0]']                 \n",
      "                                                                                                  \n",
      " sigmaOut (Dense)               (None, 1)            257         ['dense2[0][0]']                 \n",
      "                                                                                                  \n",
      " disAct0 (Dense)                (None, 3)            771         ['dense2[0][0]']                 \n",
      "                                                                                                  \n",
      " disAct1 (Dense)                (None, 3)            771         ['dense2[0][0]']                 \n",
      "                                                                                                  \n",
      " disAct2 (Dense)                (None, 2)            514         ['dense2[0][0]']                 \n",
      "                                                                                                  \n",
      " tf.math.multiply (TFOpLambda)  (None, 1)            0           ['muOut[0][0]']                  \n",
      "                                                                                                  \n",
      " tf.math.add (TFOpLambda)       (None, 1)            0           ['sigmaOut[0][0]']               \n",
      "                                                                                                  \n",
      " totalOut (Concatenate)         (None, 10)           0           ['disAct0[0][0]',                \n",
      "                                                                  'disAct1[0][0]',                \n",
      "                                                                  'disAct2[0][0]',                \n",
      "                                                                  'tf.math.multiply[0][0]',       \n",
      "                                                                  'tf.math.add[0][0]']            \n",
      "                                                                                                  \n",
      "==================================================================================================\n",
      "Total params: 444,682\n",
      "Trainable params: 444,682\n",
      "Non-trainable params: 0\n",
      "__________________________________________________________________________________________________\n",
      "---------Critic Model Create Success---------\n",
      "Model: \"model\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
      " stateInput (InputLayer)     [(None, 93)]              0         \n",
      "                                                                 \n",
      " dense0 (Dense)              (None, 512)               48128     \n",
      "                                                                 \n",
      " dense1 (Dense)              (None, 512)               262656    \n",
      "                                                                 \n",
      " dense2 (Dense)              (None, 256)               131328    \n",
      "                                                                 \n",
      " dense (Dense)               (None, 1)                 257       \n",
      "                                                                 \n",
      "=================================================================\n",
      "Total params: 442,369\n",
      "Trainable params: 442,369\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "ENV_PATH = \"../Build-CloseEnemyCut/Aimbot-PPO\"\n",
    "EXPERT_DIR = \"../GAIL-Expert-Data/1015-0148/pack-53518.npz\"\n",
    "WORKER_ID = 1\n",
    "BASE_PORT = 200\n",
    "MAX_BUFFER_SIZE = 256\n",
    "\n",
    "MAX_EP = 1000000000\n",
    "STACKSTATESSIZE = 3\n",
    "STACKINTERCE = 29\n",
    "\n",
    "env = aimBotEnv.makeEnv(\n",
    "    envPath=ENV_PATH,\n",
    "    workerID=WORKER_ID,\n",
    "    basePort=BASE_PORT,\n",
    "    stackSize=STACKSTATESSIZE,\n",
    "    stackIntercal=STACKINTERCE,\n",
    ")\n",
    "\n",
    "STATE_SIZE = env.STATE_SIZE\n",
    "DISACT_SHAPE = env.DISCRETE_SHAPE\n",
    "CONACT_SIZE = env.CONTINUOUS_SIZE\n",
    "CONACT_RANGE = 10\n",
    "\n",
    "ppoConf = PPOConfig(\n",
    "    NNShape=[512, 512, 256],\n",
    "    actorLR=2e-3,\n",
    "    criticLR=2e-3,\n",
    "    gamma=0.99,\n",
    "    lmbda=0.95,\n",
    "    clipRange=0.20,\n",
    "    entropyWeight=5e-3,\n",
    "    trainEpochs=5,\n",
    "    saveDir=\"GAIL-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",
    "    loadModelDir=None,\n",
    ")\n",
    "gailConf = GAILConfig(\n",
    "    discrimNNShape=[256, 128],\n",
    "    discrimLR=1e-4,\n",
    "    discrimTrainEpochs=5,\n",
    "    discrimSaveDir=\"GAIL-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",
    "    ppoConfig=ppoConf\n",
    ")\n",
    "\n",
    "agentMem = GAILMem()\n",
    "expertMem = GAILMem()\n",
    "expertMem.loadMemFile(EXPERT_DIR)\n",
    "gailHis = GAILHistory()\n",
    "gail = GAIL(\n",
    "    stateSize=STATE_SIZE,\n",
    "    disActShape=DISACT_SHAPE,\n",
    "    conActSize=CONACT_SIZE,\n",
    "    conActRange=CONACT_RANGE,\n",
    "    gailConfig=gailConf,\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d1f8428c32f04f8da4a43a905fd9481c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/256 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "ename": "ValueError",
     "evalue": "not enough values to unpack (expected 5, got 2)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_2500\\815726880.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     33\u001b[0m     \u001b[0mdiscrimRewards\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgail\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minference\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0magentStates\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActions\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m10.0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     34\u001b[0m     \u001b[1;31m# train agentPPO\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 35\u001b[1;33m     actorLosses, criticLosses, averageEntropy, discreteEntropys, continuousEntropys = gail.trainPPO(\n\u001b[0m\u001b[0;32m     36\u001b[0m         \u001b[0magentStates\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActorProbs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdiscrimRewards\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentDones\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     37\u001b[0m     )\n",
      "\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 5, got 2)"
     ]
    }
   ],
   "source": [
    "bestReward = 0\n",
    "for ep in range(MAX_EP):\n",
    "    # get sample\n",
    "    state, _, _, _, _ = env.reset()\n",
    "    totalRewards = []\n",
    "    totalReward = 0\n",
    "    saveNow = 0\n",
    "    for step in tqdm(range(MAX_BUFFER_SIZE)):\n",
    "        actions, predictResult = gail.getActions(state)\n",
    "        nextState, reward, done, _, saveNow = env.step(actions)\n",
    "        agentMem.saveMems(\n",
    "            state=state, actorProb=predictResult, action=actions, reward=reward, done=done\n",
    "        )\n",
    "        state = nextState\n",
    "        totalReward += reward\n",
    "        if done:\n",
    "            totalRewards.append(totalReward)\n",
    "            totalReward = 0\n",
    "            state, _, _, _, _ = env.reset()\n",
    "    # add reward to history\n",
    "    totalRewards.append(totalReward)\n",
    "    # get all memory data\n",
    "    demoStates, _, demoActions, _, _ = expertMem.getRandomSample(MAX_BUFFER_SIZE)\n",
    "    agentStates = agentMem.getStates()\n",
    "    agentActions = agentMem.getActions()\n",
    "    agentActorProbs = agentMem.getActorProbs()\n",
    "    agentDones = agentMem.getDones()\n",
    "    # train discriminatorQ\n",
    "    discrimLosses, demoAcc, agentAcc = gail.trainDiscriminator(\n",
    "        demoStates, demoActions, agentStates, agentActions\n",
    "    )\n",
    "    # get disriminator predict rewards\n",
    "    discrimRewards = gail.inference(agentStates, agentActions) * 10.0\n",
    "    # train agentPPO\n",
    "    actorLosses, criticLosses, averageEntropy = gail.trainPPO(\n",
    "        agentStates, agentActorProbs, agentActions, discrimRewards, agentDones, nextState\n",
    "    )\n",
    "    gailHis.saveHis(\n",
    "        np.mean(totalRewards),\n",
    "        discrimLosses,\n",
    "        actorLosses,\n",
    "        criticLosses,\n",
    "        demoAcc,\n",
    "        agentAcc,\n",
    "        averageEntropy,\n",
    "        discrimRewards,\n",
    "    )\n",
    "    clear_output()\n",
    "    gailHis.drawHis()\n",
    "    # got best reward?\n",
    "    if np.mean(totalRewards) >= bestReward:\n",
    "        bestReward = np.mean(totalRewards)\n",
    "        gail.saveWeights(np.mean(totalRewards))\n",
    "    agentMem.clearMem()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.7 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"metadata": {},`
Parallel Environment Discrete PPO finish Parallel Environment Discrete PPO finish. Runnable. 2022-10-29 19:13:14 +00:00			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\requests\\__init__.py:102: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (5.0.0)/charset_normalizer (2.0.6) doesn't match a supported version!\n",`
			`" warnings.warn(\"urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported \"\n"`
			`]`
			`}`
			`],`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`"source": [`
			`"import numpy as np\n",`
			`"import tensorflow as tf\n",`
			`"import time\n",`
			`"import datetime\n",`
			`"import aimBotEnv\n",`
			`"\n",`
			`"from GAIL import GAIL\n",`
			`"from GAILConfig import GAILConfig\n",`
			`"from PPOConfig import PPOConfig\n",`
			`"from GAILMem import GAILMem\n",`
			`"from GAILHistory import GAILHistory\n",`
			`"from IPython.display import clear_output\n",`
			`"from tqdm.notebook import tqdm as tqdm"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# Attempts to allocate only the GPU memory needed for allocation\n",`
			`"physical_devices = tf.config.list_physical_devices(\"GPU\")\n",`
			`"tf.config.experimental.set_memory_growth(physical_devices[0], True)\n",`
			`"tf.random.set_seed(9331)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 3,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"√√√√√Enviroment Initialized Success√√√√√\n",`
			`"√√√√√Buffer Initialized Success√√√√√\n",`
			`"√√√√√Buffer Initialized Success√√√√√\n",`
			`"---------thisPPO Params---------\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`"self.stateSize = 93\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`"self.disActShape = [3, 3, 2]\n",`
			`"self.disActSize 3\n",`
			`"self.disOutputSize 8\n",`
			`"self.conActSize = 1\n",`
			`"self.conActRange = 10\n",`
			`"self.conOutputSize = 2\n",`
			`"---------thisPPO config---------\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`"self.NNShape = [512, 512, 256]\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`"self.criticLR = 0.002\n",`
			`"self.actorLR = 0.002\n",`
			`"self.gamma = 0.99\n",`
			`"self.lmbda = 0.95\n",`
			`"self.clipRange = 0.2\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`"self.entropyWeight = 0.005\n",`
			`"self.trainEpochs = 5\n",`
Parallel Environment Discrete PPO finish Parallel Environment Discrete PPO finish. Runnable. 2022-10-29 19:13:14 +00:00			`"self.saveDir = GAIL-Model/1027-2240/\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`"self.loadModelDir = None\n",`
			`"---------Actor Model Create Success---------\n",`
			`"Model: \"model_1\"\n",`
			`"__________________________________________________________________________________________________\n",`
			`" Layer (type) Output Shape Param # Connected to \n",`
			`"==================================================================================================\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" stateInput (InputLayer) [(None, 93)] 0 [] \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" dense0 (Dense) (None, 512) 48128 ['stateInput[0][0]'] \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" dense1 (Dense) (None, 512) 262656 ['dense0[0][0]'] \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" dense2 (Dense) (None, 256) 131328 ['dense1[0][0]'] \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" muOut (Dense) (None, 1) 257 ['dense2[0][0]'] \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" sigmaOut (Dense) (None, 1) 257 ['dense2[0][0]'] \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" disAct0 (Dense) (None, 3) 771 ['dense2[0][0]'] \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" disAct1 (Dense) (None, 3) 771 ['dense2[0][0]'] \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" disAct2 (Dense) (None, 2) 514 ['dense2[0][0]'] \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
			`" tf.math.multiply (TFOpLambda) (None, 1) 0 ['muOut[0][0]'] \n",`
			`" \n",`
			`" tf.math.add (TFOpLambda) (None, 1) 0 ['sigmaOut[0][0]'] \n",`
			`" \n",`
			`" totalOut (Concatenate) (None, 10) 0 ['disAct0[0][0]', \n",`
			`" 'disAct1[0][0]', \n",`
			`" 'disAct2[0][0]', \n",`
			`" 'tf.math.multiply[0][0]', \n",`
			`" 'tf.math.add[0][0]'] \n",`
			`" \n",`
			`"==================================================================================================\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`"Total params: 444,682\n",`
			`"Trainable params: 444,682\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`"Non-trainable params: 0\n",`
			`"__________________________________________________________________________________________________\n",`
			`"---------Critic Model Create Success---------\n",`
			`"Model: \"model\"\n",`
			`"_________________________________________________________________\n",`
			`" Layer (type) Output Shape Param # \n",`
			`"=================================================================\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" stateInput (InputLayer) [(None, 93)] 0 \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" dense0 (Dense) (None, 512) 48128 \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" dense1 (Dense) (None, 512) 262656 \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" dense2 (Dense) (None, 256) 131328 \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" dense (Dense) (None, 1) 257 \n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" \n",`
			`"=================================================================\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`"Total params: 442,369\n",`
			`"Trainable params: 442,369\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`"Non-trainable params: 0\n",`
			`"_________________________________________________________________\n"`
			`]`
			`}`
			`],`
			`"source": [`
Parallel Environment Discrete PPO finish Parallel Environment Discrete PPO finish. Runnable. 2022-10-29 19:13:14 +00:00			`"ENV_PATH = \"../Build-CloseEnemyCut/Aimbot-PPO\"\n",`
			`"EXPERT_DIR = \"../GAIL-Expert-Data/1015-0148/pack-53518.npz\"\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`"WORKER_ID = 1\n",`
			`"BASE_PORT = 200\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`"MAX_BUFFER_SIZE = 256\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`"\n",`
			`"MAX_EP = 1000000000\n",`
			`"STACKSTATESSIZE = 3\n",`
			`"STACKINTERCE = 29\n",`
			`"\n",`
			`"env = aimBotEnv.makeEnv(\n",`
			`" envPath=ENV_PATH,\n",`
			`" workerID=WORKER_ID,\n",`
			`" basePort=BASE_PORT,\n",`
			`" stackSize=STACKSTATESSIZE,\n",`
			`" stackIntercal=STACKINTERCE,\n",`
			`")\n",`
			`"\n",`
			`"STATE_SIZE = env.STATE_SIZE\n",`
			`"DISACT_SHAPE = env.DISCRETE_SHAPE\n",`
			`"CONACT_SIZE = env.CONTINUOUS_SIZE\n",`
			`"CONACT_RANGE = 10\n",`
			`"\n",`
			`"ppoConf = PPOConfig(\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" NNShape=[512, 512, 256],\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" actorLR=2e-3,\n",`
			`" criticLR=2e-3,\n",`
			`" gamma=0.99,\n",`
			`" lmbda=0.95,\n",`
			`" clipRange=0.20,\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" entropyWeight=5e-3,\n",`
			`" trainEpochs=5,\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" saveDir=\"GAIL-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",`
			`" loadModelDir=None,\n",`
			`")\n",`
			`"gailConf = GAILConfig(\n",`
			`" discrimNNShape=[256, 128],\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" discrimLR=1e-4,\n",`
			`" discrimTrainEpochs=5,\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" discrimSaveDir=\"GAIL-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",`
			`" ppoConfig=ppoConf\n",`
			`")\n",`
			`"\n",`
			`"agentMem = GAILMem()\n",`
			`"expertMem = GAILMem()\n",`
			`"expertMem.loadMemFile(EXPERT_DIR)\n",`
			`"gailHis = GAILHistory()\n",`
			`"gail = GAIL(\n",`
			`" stateSize=STATE_SIZE,\n",`
			`" disActShape=DISACT_SHAPE,\n",`
			`" conActSize=CONACT_SIZE,\n",`
			`" conActRange=CONACT_RANGE,\n",`
			`" gailConfig=gailConf,\n",`
			`")\n"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 4,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"application/vnd.jupyter.widget-view+json": {`
Parallel Environment Discrete PPO finish Parallel Environment Discrete PPO finish. Runnable. 2022-10-29 19:13:14 +00:00			`"model_id": "d1f8428c32f04f8da4a43a905fd9481c",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`"version_major": 2,`
			`"version_minor": 0`
			`},`
			`"text/plain": [`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" 0%\| \| 0/256 [00:00<?, ?it/s]"`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`]`
			`},`
			`"metadata": {},`
			`"output_type": "display_data"`
Parallel Environment Discrete PPO finish Parallel Environment Discrete PPO finish. Runnable. 2022-10-29 19:13:14 +00:00			`},`
			`{`
			`"ename": "ValueError",`
			`"evalue": "not enough values to unpack (expected 5, got 2)",`
			`"output_type": "error",`
			`"traceback": [`
			`"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",`
			`"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",`
			"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_2500\\815726880.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[0mdiscrimRewards\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgail\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minference\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0magentStates\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActions\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m10.0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[1;31m# train agentPPO\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 35\u001b[1;33m actorLosses, criticLosses, averageEntropy, discreteEntropys, continuousEntropys = gail.trainPPO(\n\u001b[0m\u001b[0;32m 36\u001b[0m \u001b[0magentStates\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActorProbs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdiscrimRewards\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentDones\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 37\u001b[0m )\n",
			`"\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 5, got 2)"`
			`]`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`}`
			`],`
			`"source": [`
			`"bestReward = 0\n",`
			`"for ep in range(MAX_EP):\n",`
			`" # get sample\n",`
			`" state, _, _, _, _ = env.reset()\n",`
			`" totalRewards = []\n",`
			`" totalReward = 0\n",`
			`" saveNow = 0\n",`
			`" for step in tqdm(range(MAX_BUFFER_SIZE)):\n",`
			`" actions, predictResult = gail.getActions(state)\n",`
			`" nextState, reward, done, _, saveNow = env.step(actions)\n",`
			`" agentMem.saveMems(\n",`
			`" state=state, actorProb=predictResult, action=actions, reward=reward, done=done\n",`
			`" )\n",`
			`" state = nextState\n",`
			`" totalReward += reward\n",`
			`" if done:\n",`
			`" totalRewards.append(totalReward)\n",`
			`" totalReward = 0\n",`
			`" state, _, _, _, _ = env.reset()\n",`
			`" # add reward to history\n",`
			`" totalRewards.append(totalReward)\n",`
			`" # get all memory data\n",`
			`" demoStates, _, demoActions, _, _ = expertMem.getRandomSample(MAX_BUFFER_SIZE)\n",`
			`" agentStates = agentMem.getStates()\n",`
			`" agentActions = agentMem.getActions()\n",`
			`" agentActorProbs = agentMem.getActorProbs()\n",`
			`" agentDones = agentMem.getDones()\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" # train discriminatorQ\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" discrimLosses, demoAcc, agentAcc = gail.trainDiscriminator(\n",`
			`" demoStates, demoActions, agentStates, agentActions\n",`
			`" )\n",`
			`" # get disriminator predict rewards\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" discrimRewards = gail.inference(agentStates, agentActions) * 10.0\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" # train agentPPO\n",`
Parallel Environment Discrete PPO finish Parallel Environment Discrete PPO finish. Runnable. 2022-10-29 19:13:14 +00:00			`" actorLosses, criticLosses, averageEntropy = gail.trainPPO(\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" agentStates, agentActorProbs, agentActions, discrimRewards, agentDones, nextState\n",`
			`" )\n",`
			`" gailHis.saveHis(\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" np.mean(totalRewards),\n",`
			`" discrimLosses,\n",`
			`" actorLosses,\n",`
			`" criticLosses,\n",`
			`" demoAcc,\n",`
			`" agentAcc,\n",`
			`" averageEntropy,\n",`
			`" discrimRewards,\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" )\n",`
			`" clear_output()\n",`
			`" gailHis.drawHis()\n",`
			`" # got best reward?\n",`
Add Gun State, fix PPO GAIL class bug Add Gun state fix PPO GAIL class errors 2022-10-23 14:38:07 +00:00			`" if np.mean(totalRewards) >= bestReward:\n",`
GAIL class fix some bug, now runnable GAIL class fix , no new function 2022-10-14 16:08:08 +00:00			`" bestReward = np.mean(totalRewards)\n",`
			`" gail.saveWeights(np.mean(totalRewards))\n",`
			`" agentMem.clearMem()\n"`
			`]`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3.9.7 64-bit",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.9.7"`
			`},`
			`"orig_nbformat": 4,`
			`"vscode": {`
			`"interpreter": {`
			`"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"`
			`}`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`