{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\requests\\__init__.py:102: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (5.0.0)/charset_normalizer (2.0.6) doesn't match a supported version!\n",
      "  warnings.warn(\"urllib3 ({}) or chardet ({})/charset_normalizer ({}) doesn't match a supported \"\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "import time\n",
    "import datetime\n",
    "import aimBotEnv\n",
    "\n",
    "from GAIL import GAIL\n",
    "from GAILConfig import GAILConfig\n",
    "from PPOConfig import PPOConfig\n",
    "from GAILMem import GAILMem\n",
    "from GAILHistory import GAILHistory\n",
    "from IPython.display import clear_output\n",
    "from tqdm.notebook import tqdm as tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Attempts to allocate only the GPU memory needed for allocation\n",
    "physical_devices = tf.config.list_physical_devices(\"GPU\")\n",
    "tf.config.experimental.set_memory_growth(physical_devices[0], True)\n",
    "tf.random.set_seed(9331)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "√√√√√Enviroment Initialized Success√√√√√\n",
      "√√√√√Buffer Initialized Success√√√√√\n",
      "√√√√√Buffer Initialized Success√√√√√\n",
      "---------thisPPO Params---------\n",
      "self.stateSize =  93\n",
      "self.disActShape =  [3, 3, 2]\n",
      "self.disActSize 3\n",
      "self.disOutputSize 8\n",
      "self.conActSize =  1\n",
      "self.conActRange =  10\n",
      "self.conOutputSize =  2\n",
      "---------thisPPO config---------\n",
      "self.NNShape =  [512, 512, 256]\n",
      "self.criticLR =  0.002\n",
      "self.actorLR =  0.002\n",
      "self.gamma =  0.99\n",
      "self.lmbda =  0.95\n",
      "self.clipRange =  0.2\n",
      "self.entropyWeight =  0.005\n",
      "self.trainEpochs =  5\n",
      "self.saveDir =  GAIL-Model/1027-2240/\n",
      "self.loadModelDir =  None\n",
      "---------Actor Model Create Success---------\n",
      "Model: \"model_1\"\n",
      "__________________________________________________________________________________________________\n",
      " Layer (type)                   Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      " stateInput (InputLayer)        [(None, 93)]         0           []                               \n",
      "                                                                                                  \n",
      " dense0 (Dense)                 (None, 512)          48128       ['stateInput[0][0]']             \n",
      "                                                                                                  \n",
      " dense1 (Dense)                 (None, 512)          262656      ['dense0[0][0]']                 \n",
      "                                                                                                  \n",
      " dense2 (Dense)                 (None, 256)          131328      ['dense1[0][0]']                 \n",
      "                                                                                                  \n",
      " muOut (Dense)                  (None, 1)            257         ['dense2[0][0]']                 \n",
      "                                                                                                  \n",
      " sigmaOut (Dense)               (None, 1)            257         ['dense2[0][0]']                 \n",
      "                                                                                                  \n",
      " disAct0 (Dense)                (None, 3)            771         ['dense2[0][0]']                 \n",
      "                                                                                                  \n",
      " disAct1 (Dense)                (None, 3)            771         ['dense2[0][0]']                 \n",
      "                                                                                                  \n",
      " disAct2 (Dense)                (None, 2)            514         ['dense2[0][0]']                 \n",
      "                                                                                                  \n",
      " tf.math.multiply (TFOpLambda)  (None, 1)            0           ['muOut[0][0]']                  \n",
      "                                                                                                  \n",
      " tf.math.add (TFOpLambda)       (None, 1)            0           ['sigmaOut[0][0]']               \n",
      "                                                                                                  \n",
      " totalOut (Concatenate)         (None, 10)           0           ['disAct0[0][0]',                \n",
      "                                                                  'disAct1[0][0]',                \n",
      "                                                                  'disAct2[0][0]',                \n",
      "                                                                  'tf.math.multiply[0][0]',       \n",
      "                                                                  'tf.math.add[0][0]']            \n",
      "                                                                                                  \n",
      "==================================================================================================\n",
      "Total params: 444,682\n",
      "Trainable params: 444,682\n",
      "Non-trainable params: 0\n",
      "__________________________________________________________________________________________________\n",
      "---------Critic Model Create Success---------\n",
      "Model: \"model\"\n",
      "_________________________________________________________________\n",
      " Layer (type)                Output Shape              Param #   \n",
      "=================================================================\n",
      " stateInput (InputLayer)     [(None, 93)]              0         \n",
      "                                                                 \n",
      " dense0 (Dense)              (None, 512)               48128     \n",
      "                                                                 \n",
      " dense1 (Dense)              (None, 512)               262656    \n",
      "                                                                 \n",
      " dense2 (Dense)              (None, 256)               131328    \n",
      "                                                                 \n",
      " dense (Dense)               (None, 1)                 257       \n",
      "                                                                 \n",
      "=================================================================\n",
      "Total params: 442,369\n",
      "Trainable params: 442,369\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "ENV_PATH = \"../Build-CloseEnemyCut/Aimbot-PPO\"\n",
    "EXPERT_DIR = \"../GAIL-Expert-Data/1015-0148/pack-53518.npz\"\n",
    "WORKER_ID = 1\n",
    "BASE_PORT = 200\n",
    "MAX_BUFFER_SIZE = 256\n",
    "\n",
    "MAX_EP = 1000000000\n",
    "STACKSTATESSIZE = 3\n",
    "STACKINTERCE = 29\n",
    "\n",
    "env = aimBotEnv.makeEnv(\n",
    "    envPath=ENV_PATH,\n",
    "    workerID=WORKER_ID,\n",
    "    basePort=BASE_PORT,\n",
    "    stackSize=STACKSTATESSIZE,\n",
    "    stackIntercal=STACKINTERCE,\n",
    ")\n",
    "\n",
    "STATE_SIZE = env.STATE_SIZE\n",
    "DISACT_SHAPE = env.DISCRETE_SHAPE\n",
    "CONACT_SIZE = env.CONTINUOUS_SIZE\n",
    "CONACT_RANGE = 10\n",
    "\n",
    "ppoConf = PPOConfig(\n",
    "    NNShape=[512, 512, 256],\n",
    "    actorLR=2e-3,\n",
    "    criticLR=2e-3,\n",
    "    gamma=0.99,\n",
    "    lmbda=0.95,\n",
    "    clipRange=0.20,\n",
    "    entropyWeight=5e-3,\n",
    "    trainEpochs=5,\n",
    "    saveDir=\"GAIL-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",
    "    loadModelDir=None,\n",
    ")\n",
    "gailConf = GAILConfig(\n",
    "    discrimNNShape=[256, 128],\n",
    "    discrimLR=1e-4,\n",
    "    discrimTrainEpochs=5,\n",
    "    discrimSaveDir=\"GAIL-Model/\" + datetime.datetime.now().strftime(\"%m%d-%H%M\") + \"/\",\n",
    "    ppoConfig=ppoConf\n",
    ")\n",
    "\n",
    "agentMem = GAILMem()\n",
    "expertMem = GAILMem()\n",
    "expertMem.loadMemFile(EXPERT_DIR)\n",
    "gailHis = GAILHistory()\n",
    "gail = GAIL(\n",
    "    stateSize=STATE_SIZE,\n",
    "    disActShape=DISACT_SHAPE,\n",
    "    conActSize=CONACT_SIZE,\n",
    "    conActRange=CONACT_RANGE,\n",
    "    gailConfig=gailConf,\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d1f8428c32f04f8da4a43a905fd9481c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/256 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "ename": "ValueError",
     "evalue": "not enough values to unpack (expected 5, got 2)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_2500\\815726880.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     33\u001b[0m     \u001b[0mdiscrimRewards\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgail\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minference\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0magentStates\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActions\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;36m10.0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     34\u001b[0m     \u001b[1;31m# train agentPPO\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 35\u001b[1;33m     actorLosses, criticLosses, averageEntropy, discreteEntropys, continuousEntropys = gail.trainPPO(\n\u001b[0m\u001b[0;32m     36\u001b[0m         \u001b[0magentStates\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActorProbs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentActions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdiscrimRewards\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0magentDones\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     37\u001b[0m     )\n",
      "\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 5, got 2)"
     ]
    }
   ],
   "source": [
    "bestReward = 0\n",
    "for ep in range(MAX_EP):\n",
    "    # get sample\n",
    "    state, _, _, _, _ = env.reset()\n",
    "    totalRewards = []\n",
    "    totalReward = 0\n",
    "    saveNow = 0\n",
    "    for step in tqdm(range(MAX_BUFFER_SIZE)):\n",
    "        actions, predictResult = gail.getActions(state)\n",
    "        nextState, reward, done, _, saveNow = env.step(actions)\n",
    "        agentMem.saveMems(\n",
    "            state=state, actorProb=predictResult, action=actions, reward=reward, done=done\n",
    "        )\n",
    "        state = nextState\n",
    "        totalReward += reward\n",
    "        if done:\n",
    "            totalRewards.append(totalReward)\n",
    "            totalReward = 0\n",
    "            state, _, _, _, _ = env.reset()\n",
    "    # add reward to history\n",
    "    totalRewards.append(totalReward)\n",
    "    # get all memory data\n",
    "    demoStates, _, demoActions, _, _ = expertMem.getRandomSample(MAX_BUFFER_SIZE)\n",
    "    agentStates = agentMem.getStates()\n",
    "    agentActions = agentMem.getActions()\n",
    "    agentActorProbs = agentMem.getActorProbs()\n",
    "    agentDones = agentMem.getDones()\n",
    "    # train discriminatorQ\n",
    "    discrimLosses, demoAcc, agentAcc = gail.trainDiscriminator(\n",
    "        demoStates, demoActions, agentStates, agentActions\n",
    "    )\n",
    "    # get disriminator predict rewards\n",
    "    discrimRewards = gail.inference(agentStates, agentActions) * 10.0\n",
    "    # train agentPPO\n",
    "    actorLosses, criticLosses, averageEntropy = gail.trainPPO(\n",
    "        agentStates, agentActorProbs, agentActions, discrimRewards, agentDones, nextState\n",
    "    )\n",
    "    gailHis.saveHis(\n",
    "        np.mean(totalRewards),\n",
    "        discrimLosses,\n",
    "        actorLosses,\n",
    "        criticLosses,\n",
    "        demoAcc,\n",
    "        agentAcc,\n",
    "        averageEntropy,\n",
    "        discrimRewards,\n",
    "    )\n",
    "    clear_output()\n",
    "    gailHis.drawHis()\n",
    "    # got best reward?\n",
    "    if np.mean(totalRewards) >= bestReward:\n",
    "        bestReward = np.mean(totalRewards)\n",
    "        gail.saveWeights(np.mean(totalRewards))\n",
    "    agentMem.clearMem()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.9.7 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}