GAIL Class, human action record future added

add GAIL GAILMem GAILConfig Class. add HumanAction record to save expert data. add tackState future for stack multiple states to let agent knows what happened before.
2022-10-14 19:05:02 +09:00 · 2022-10-14 19:05:02 +09:00 · 2a498f18f6
commit 2a498f18f6
parent ae8a1ba8e2
8 changed files with 1066 additions and 39 deletions
--- a/.gitignore
+++ b/.gitignore
@ -74,8 +74,10 @@ crashlytics-build.properties

 # Python Folder
 /Aimbot-PPO-Python/.vscode/
+/Aimbot-PPO-Python/.mypy_cache/
 /Aimbot-PPO-Python/__pycache__/
 /Aimbot-PPO-Python/Backup/
 /Aimbot-PPO-Python/Build-MultiScene-WithLoad/
 /Aimbot-PPO-Python/Build-CloseEnemyCut/
-/Aimbot-PPO-Python/PPO-Model/
+/Aimbot-PPO-Python/PPO-Model/
+/Aimbot-PPO-Python/GAIL-Expert-Data/
--- a/Aimbot-PPO-Python/DemoRecorder.ipynb
+++ b/Aimbot-PPO-Python/DemoRecorder.ipynb
@ -9,13 +9,15 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "√√√√√Enviroment Initialized Success√√√√√\n"
+      "√√√√√Enviroment Initialized Success√√√√√\n",
+      "√√√√√Buffer Initialized Success√√√√√\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "import aimBotEnv\n",
+    "from GAILMem import GAILMem\n",
    "from HumanAction import HumanActions\n",
    "\n",
    "# Env\n",
@ -23,10 +25,21 @@
    "WORKER_ID = 1\n",
    "BASE_PORT = 200\n",
    "\n",
-    "MOUSEDISCOUNT = 8.0\n",
+    "# ENV Para\n",
+    "MOUSEDISCOUNT = 20.0\n",
    "MAX_EP = 10000000\n",
+    "STACKSTATESIZE = 3\n",
+    "STACKINTERCE = 29\n",
    "\n",
-    "env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n"
+    "env = aimBotEnv.makeEnv(\n",
+    "    envPath=ENV_PATH,\n",
+    "    workerID=WORKER_ID,\n",
+    "    basePort=BASE_PORT,\n",
+    "    stackSize=STACKSTATESIZE,\n",
+    "    stackIntercal=STACKINTERCE,\n",
+    ")\n",
+    "demoMem = GAILMem()\n",
+    "demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n"
   ]
  },
  {
@ -34,6 +47,513 @@
   "execution_count": 2,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "EP Start\n",
+      "EP Start\n",
+      "EP Start\n",
+      "nowMemNum 743\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\GAILMem.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  actionsNP = np.asarray(self.actions)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "nowMemNum 993\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 1199\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 1426\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 1671\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 1890\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 2097\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 2307\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 2510\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 2710\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 2889\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 3079\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 3263\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 3506\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 3764\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 3982\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 4155\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 4338\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 4530\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 4749\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 4979\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 5159\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 5358\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 5641\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 5887\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 6085\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 6312\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 6471\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 6691\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 6885\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 7086\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 7248\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 7437\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 7608\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 7788\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 8020\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 8193\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 8447\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 8675\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 8869\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 9046\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 9260\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 9469\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 9633\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 9802\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 10019\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 10205\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 10387\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 10657\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 10834\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 11071\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 11284\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 11516\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 11735\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 11948\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 12157\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 12330\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 12565\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 12768\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 12944\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 13129\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 13292\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 13590\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 13765\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 13921\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 14083\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 14254\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 14445\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 14662\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 14833\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 15056\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 15258\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 15425\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 15590\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 15829\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 16057\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 16237\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 16411\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 16612\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 16812\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17001\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17173\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17342\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17515\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17715\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 17890\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 18072\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 18261\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 18489\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 18701\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 18886\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 19100\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 19318\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 19487\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 19670\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 19881\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 20041\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 20279\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 20491\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 20679\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 20877\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 21070\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 21305\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 21519\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 21760\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 21936\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 22135\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 22304\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 22512\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 22706\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 22882\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 23123\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 23290\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 23453\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 23707\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 23942\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 24153\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 24346\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 24573\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 24757\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n",
+      "nowMemNum 24957\n",
+      "lastMemCheckPoint 1\n",
+      "mem_saved\n",
+      "EP Start\n"
+     ]
+    },
    {
     "ename": "UnityCommunicatorStoppedException",
     "evalue": "Communicator has exited.",
@ -41,8 +561,8 @@
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m         Traceback (most recent call last)",
-      "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_37248/645561173.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      5\u001b[0m     \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m         \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m         \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m     72\u001b[0m         \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     73\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 74\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     75\u001b[0m         \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     76\u001b[0m         \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_19308/2258777724.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      7\u001b[0m     \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m         \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m         \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     10\u001b[0m         \u001b[0mdemoMem\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveMems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mactorProb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     11\u001b[0m         \u001b[0mstate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m     86\u001b[0m         \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     87\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 88\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     89\u001b[0m         \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     90\u001b[0m         \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\timers.py\u001b[0m in \u001b[0;36mwrapped\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    303\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    304\u001b[0m         \u001b[1;32mwith\u001b[0m \u001b[0mhierarchical_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__qualname__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 305\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    306\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    307\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m  \u001b[1;31m# type: ignore\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\environment.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    333\u001b[0m             \u001b[0moutputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_communicator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexchange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep_input\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_poll_process\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    334\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 335\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0mUnityCommunicatorStoppedException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Communicator has exited.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    336\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_update_behavior_specs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    337\u001b[0m         \u001b[0mrl_output\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moutputs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrl_output\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m: Communicator has exited."
@ -50,13 +570,26 @@
    }
   ],
   "source": [
-    "done = False\n",
-    "env.reset()\n",
-    "demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n",
+    "gailExpertDataDir = \"GAIL-Expert-Data/\"\n",
+    "state, _, _, _, _ = env.reset()\n",
+    "\n",
    "for ep in range(MAX_EP):\n",
+    "    print(\"EP Start\")\n",
+    "    done = False\n",
    "    while not done:\n",
    "        actions = demoAct.getHumanActions()\n",
-    "        env.step(actions=actions)6\n"
+    "        nextState, _, done, _, _ = env.step(actions=actions)\n",
+    "        demoMem.saveMems(state=state, actorProb=None, action=actions, reward=None, done=None)\n",
+    "        state = nextState\n",
+    "    nowMemNum = demoMem.memNum\n",
+    "    saveSteps = 500\n",
+    "    lastMemCheckPoint = 0\n",
+    "    if nowMemNum / saveSteps >= lastMemCheckPoint + 1:\n",
+    "        lastMemCheckPoint +=1\n",
+    "        print(\"nowMemNum\", nowMemNum)\n",
+    "        print(\"lastMemCheckPoint\", lastMemCheckPoint)\n",
+    "        demoMem.saveMemtoFile(gailExpertDataDir)\n",
+    "        print(\"mem_saved\")\n"
   ]
  }
 ],
--- a/Aimbot-PPO-Python/GAIL.py
+++ b/Aimbot-PPO-Python/GAIL.py
@ -0,0 +1,170 @@
+import tensorflow as tf
+import numpy as np
+from numpy import ndarray
+
+from PPO import PPO
+from tensorflow import keras
+from tensorflow.keras import layers
+from tensorflow.keras import optimizers
+
+from GAILConfig import GAILConfig
+
+EPS = 1e-8
+
+
+class GAIL(object):
+    def __init__(
+        self,
+        stateSize: int,
+        disActShape: list,
+        conActSize: int,
+        conActRange: float,
+        gailConfig: GAILConfig,
+    ):
+        self.stateSize = stateSize
+        self.disActShape = disActShape
+        self.disActSize = len(disActShape)
+        self.conActSize = conActSize
+        self.conActRange = conActRange
+
+        self.totalActSize = self.disActSize + conActSize
+        self.discrimInputSize = stateSize + self.totalActSize
+        self.discriminatorNNShape = gailConfig.discrimNNShape
+        self.discrimLR = gailConfig.discrimLR
+        self.discrimTrainEpochs = gailConfig.discrimTrainEpochs
+        self.ppoConfig = gailConfig.ppoConfig
+
+        self.ppo = PPO(stateSize, disActShape, conActSize, conActRange, self.ppoConfig)
+        self.discriminator = self.buildDiscriminatorNet(True)
+
+    def buildDiscriminatorNet(self, compileModel: bool):
+        # -----------Input Layers-----------
+        stateInput = layers.Input(shape=(self.discrimInputSize,), name="stateInput")
+
+        # -------Intermediate layers--------
+        interLayers = []
+        interLayersIndex = 0
+        for neuralUnit in self.discriminatorNNShape:
+            thisLayerName = "dense" + str(interLayersIndex)
+            if interLayersIndex == 0:
+                interLayers.append(
+                    layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput)
+                )
+            else:
+                interLayers.append(
+                    layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1])
+                )
+            interLayersIndex += 1
+
+        # ----------Output Layers-----------
+        output = layers.Dense(1, activation="sigmoid")(interLayers[-1])
+
+        # ----------Model Compile-----------
+        model = keras.Model(inputs=stateInput, outputs=output)
+        if compileModel:
+            criticOPT = optimizers.Adam(learning_rate=self.discrimLR)
+            model.compile(optimizer=criticOPT, loss=self.discrimLoss())
+        return model
+
+    def discrimLoss(self):
+        def loss(y_true, y_pred):
+            """discriminator loss function
+
+            Args:
+                y_true (tf.constant): demo trajectory
+                y_pred (tf.constant): agent trajectory predict value
+
+            Returns:
+                _type_: _description_
+            """
+            demoP = self.discriminator(y_true)
+            agentLoss = tf.negative(tf.reduce_mean(tf.math.log(1.0 - y_pred + EPS)))
+            demoLoss = tf.negative(tf.reduce_mean(tf.math.log(demoP + EPS)))
+            loss = agentLoss + demoLoss
+            return loss
+
+        return loss
+
+    def inference(self, states: ndarray, actions: ndarray):
+        """discriminator predict result
+
+        Args:
+            states (ndarray): states
+            actions (ndarray): actions
+
+        Returns:
+            tf.constant: discrim predict result
+        """
+        # check dimention
+        if states.ndim != 2:
+            stateNum = int(len(states) / self.stateSize)
+            states = states.reshape([stateNum, self.stateSize])
+        if actions.ndim != 2:
+            actionsNum = int(len(actions) / self.totalActSize)
+            actions = actions.reshape([actionsNum, self.totalActSize])
+
+        thisTrajectory = tf.concat([states, actions], axis=1)
+        discrimPredict = self.discriminator(thisTrajectory)
+        return discrimPredict
+
+    def discriminatorACC(
+        self, demoStates: ndarray, demoActions: ndarray, agentStates: ndarray, agentActions: ndarray
+    ):
+        demoAcc = np.mean(self.inference(demoStates, demoActions))
+        agentAcc = np.mean(self.inference(agentStates, agentActions))
+        return demoAcc, agentAcc
+
+    def trainDiscriminator(
+        self,
+        demoStates: ndarray,
+        demoActions: ndarray,
+        agentStates: ndarray,
+        agentActions: ndarray,
+        epochs: int = None,
+    ):
+        """train Discriminator
+
+        Args:
+            demoStates (ndarray): expert states
+            demoActions (ndarray): expert actions
+            agentStates (ndarray): agentPPO generated states
+            agentActions (ndarray): agentPPO generated actions
+            epoch (int): epoch times
+
+        Returns:
+            tf.constant: all losses array
+        """
+        if epochs == None:
+            epochs = self.discrimTrainEpochs
+        demoTrajectory = tf.concat([demoStates, demoActions], axis=1)
+        agentTrajectory = tf.concat([agentStates, agentActions], axis=1)
+        his = self.discriminator.fit(x=agentTrajectory, y=demoTrajectory, epochs=epochs, verbose=0)
+
+        demoAcc = np.mean(self.inference(demoStates, demoActions))
+        agentAcc = np.mean(self.inference(agentStates, agentActions))
+        return his.history["loss"], demoAcc, 1 - agentAcc
+
+    def getActions(self, state: ndarray):
+        actions, predictResult = self.ppo.chooseAction(state)
+        return actions, predictResult
+
+    def trainPPO(
+        self,
+        states: ndarray,
+        oldActorResult: ndarray,
+        actions: ndarray,
+        newRewards: ndarray,
+        dones: ndarray,
+        nextState: ndarray,
+        epochs: int = None,
+    ):
+        criticV = self.ppo.getCriticV(states)
+        discountedR = self.ppo.discountReward(nextState, criticV, dones, newRewards)
+        advantage = self.ppo.getGAE(discountedR, criticV)
+        criticLosses = self.ppo.trainCritic(states, discountedR, epochs)
+        actorLosses = self.ppo.trainActor(states, oldActorResult, actions, advantage, epochs)
+        return actorLosses, criticLosses
+
+    def generateAction(self, states: ndarray):
+        act, actorP = self.ppo.chooseAction(states)
+        return act, actorP
--- a/Aimbot-PPO-Python/GAILConfig.py
+++ b/Aimbot-PPO-Python/GAILConfig.py
@ -0,0 +1,24 @@
+import datetime
+from typing import NamedTuple
+
+from PPOConfig import PPOConfig
+
+
+class GAILConfig(NamedTuple):
+    discrimNNShape: list = [128, 64]
+    discrimLR: float = 1e-3
+    discrimTrainEpochs: int = 8
+    discrimSaveDir: str = "GAIL-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/"
+
+    ppoConfig: PPOConfig = PPOConfig(
+        NNShape=[128, 64],
+        actorLR=2e-3,
+        criticLR=2e-3,
+        gamma=0.99,
+        lmbda=0.95,
+        clipRange=0.20,
+        entropyWeight=1e-2,
+        trainEpochs=8,
+        saveDir="GAIL-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/",
+        loadModelDir=None,
+    )
--- a/Aimbot-PPO-Python/GAILMem.py
+++ b/Aimbot-PPO-Python/GAILMem.py
@ -0,0 +1,175 @@
+import os
+import random
+import numpy as np
+
+
+class GAILMem(object):
+    def __init__(self):
+        self.states = []
+        self.actorProbs = []
+        self.actions = []
+        self.rewards = []
+        self.dones = []
+        self.memNum = 0
+        print("√√√√√Buffer Initialized Success√√√√√")
+
+    def clearMem(self):
+        """clearMemories"""
+        self.states = []
+        self.actorProbs = []
+        self.actions = []
+        self.rewards = []
+        self.dones = []
+        self.memNum = 0
+
+    def saveMemtoFile(self, dir: str):
+        """save memories ndarray to npz file
+
+        Args:
+            dir (str): save direction,like"GAIL-Expert-Data/",end with "/"
+        """
+        statesNP = np.asarray(self.states)
+        actorProbsNP = np.asarray(self.actorProbs)
+        actionsNP = np.asarray(self.actions)
+        rewardsNP = np.asarray(self.rewards)
+        donesNP = np.asarray(self.dones)
+        thisSaveDir = dir + "pack-" + str(self.memNum)
+        try:
+            np.savez(
+                thisSaveDir,
+                states=statesNP,
+                actorProbs=actorProbsNP,
+                actions=actionsNP,
+                rewards=rewardsNP,
+                dones=donesNP,
+            )
+        except FileNotFoundError:
+            os.mkdir(dir)
+            np.savez(
+                thisSaveDir,
+                states=statesNP,
+                actorProbs=actorProbsNP,
+                actions=actionsNP,
+                rewards=rewardsNP,
+                dones=donesNP,
+            )
+
+    def loadMemFile(self, dir: str):
+        """load memories from mpz file
+
+        Args:
+            dir (str): file direction
+        """
+        self.clearMem()
+        memFile = np.load(dir)
+        self.states = memFile["states"].tolist()
+        self.actorProbs = memFile["actorProbs"].tolist()
+        self.actions = memFile["actions"].tolist()
+        self.rewards = memFile["rewards"].tolist()
+        self.dones = memFile["dones"].tolist()
+        self.memNum = len(self.states)
+
+    def getRandomSample(self, sampleNum: int = 0):
+        """get random unique sample set.
+
+        Args:
+            sampleNum (int, optional): sample number, while 0 return all samples. Defaults to 0.
+
+        Returns:
+            tuple: (states,actorProbs,actions,rewards,dones)
+        """
+        if sampleNum == 0:
+            return (
+                self.getStates(),
+                self.getActorProbs(),
+                self.getActions(),
+                self.getRewards(),
+                self.getDones(),
+            )
+        else:
+            randIndex = random.sample(range(0, self.memNum), sampleNum)
+            return (
+                self.standDims(np.asarray(self.states)[randIndex]),
+                self.standDims(np.asarray(self.actorProbs)[randIndex]),
+                self.standDims(np.asarray(self.actions)[randIndex]),
+                self.standDims(np.asarray(self.rewards)[randIndex]),
+                self.standDims(np.asarray(self.dones)[randIndex]),
+            )
+
+    def getStates(self):
+        """get all States data as ndarray
+
+        Returns:
+            ndarray: ndarray type State data
+        """
+        return self.standDims(np.asarray(self.states))
+
+    def getActorProbs(self):
+        """get all ActorProbs data as ndarray
+
+        Returns:
+            ndarray: ndarray type ActorProbs data
+        """
+
+        return self.standDims(np.asarray(self.actorProbs))
+
+    def getActions(self):
+        """get all Actions data as ndarray
+
+        Returns:
+            ndarray: ndarray type Actions data
+        """
+
+        return self.standDims(np.asarray(self.actions))
+
+    def getRewards(self):
+        """get all Rewards data as ndarray
+
+        Returns:
+            ndarray: ndarray type Rewards data
+        """
+
+        return self.standDims(np.asarray(self.rewards))
+
+    def getDones(self):
+        """get all Dones data as ndarray
+
+        Returns:
+            ndarray: ndarray type Dones data
+        """
+
+        return self.standDims(np.asarray(self.dones))
+
+    def standDims(self, data):
+        """standalize data's dimension
+
+        Args:
+            data (list): data list
+
+        Returns:
+            ndarray: ndarra type data
+        """
+        # standarlize data's dimension
+        if np.ndim(data) > 2:
+            return np.squeeze(data, axis=1)
+        elif np.ndim(data) < 2:
+            return np.expand_dims(data, axis=1)
+        else:
+            return np.asarray(data)
+
+    def saveMems(self, state, actorProb, action, reward, done):
+        """save memories
+
+        Args:
+            state (_type_): sates
+            actorProb (_type_): actor predict result
+            action (_type_): actor choosed action
+            reward (_type_): reward
+            done (function): done
+        """
+        self.states.append(state)
+        self.actorProbs.append(actorProb)
+        self.actions.append(action)
+        self.rewards.append(reward)
+        self.dones.append(done)
+        self.memNum += 1
--- a/Aimbot-PPO-Python/HumanAction.py
+++ b/Aimbot-PPO-Python/HumanAction.py
@ -1,5 +1,6 @@
 import keyboard
 import mouse
+import math


 class HumanActions:
@ -14,11 +15,13 @@ class HumanActions:
        self.screenW = screenW
        self.screenH = screenH
        self.MOUSEDISCOUNT = mouseDiscount
+        self.mouseSmooth = 5
+        self.mouseMax = 10

    def getHumanActions(self):
        x, _ = mouse.get_position()
        xMovement = (x - self.screenW / 2) / self.MOUSEDISCOUNT
-
+        xMovement = self.smoothMouseMovement(xMovement)
        ws = 0
        ad = 0
        click = 0
@ -42,10 +45,14 @@ class HumanActions:
        elif keyboard.is_pressed("s+a"):
            ws = 2
            ad = 2
-        if mouse.is_pressed(button="left"):
+        if keyboard.is_pressed("0"):
            click = 1

        actions = [ws, ad, click, [xMovement]]

        mouse.move(self.screenW / 2, self.screenH / 2)
        return actions
+
+    def smoothMouseMovement(self, x: float):
+        out = (1 / (1 + math.exp(-x / self.mouseSmooth)) - 1 / 2) * self.mouseMax * 2
+        return out
--- a/Aimbot-PPO-Python/aimBotEnv.py
+++ b/Aimbot-PPO-Python/aimBotEnv.py
@ -6,7 +6,14 @@ from numpy import ndarray


 class makeEnv(object):
-    def __init__(self, envPath, workerID, basePort):
+    def __init__(
+        self,
+        envPath: str,
+        workerID: int = 1,
+        basePort: int = 100,
+        stackSize: int = 1,
+        stackIntercal: int = 0,
+    ):
        self.env = UnityEnvironment(
            file_name=envPath,
            seed=1,
@ -25,24 +32,31 @@ class makeEnv(object):
        self.OBSERVATION_SPECS = self.SPEC.observation_specs[0]  # observation spec
        self.ACTION_SPEC = self.SPEC.action_spec  # action specs

-        self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size  # 　連続的な動作のSize
+        self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size
        self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches)
-        self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size  # 　離散的な動作のSize
-        self.STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE  # 環境観測データ数
+        self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size
+        self.SINGLE_STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE
+        self.STATE_SIZE = self.SINGLE_STATE_SIZE * stackSize
+
+        # stacked State
+        self.STACK_SIZE = stackSize
+        self.STATE_BUFFER_SIZE = stackSize + ((stackSize - 1) * stackIntercal)
+        self.STACK_INDEX = list(range(0, self.STATE_BUFFER_SIZE, stackIntercal + 1))
+        self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
        print("√√√√√Enviroment Initialized Success√√√√√")

    def step(
        self,
        actions: list,
        behaviorName: ndarray = None,
-        trackedAgent: ndarray = None,
+        trackedAgent: int = None,
    ):
        """change ations list to ActionTuple then send it to enviroment

        Args:
            actions (list): PPO chooseAction output action list
            behaviorName (ndarray, optional): behaviorName. Defaults to None.
-            trackedAgent (ndarray, optional): trackedAgentID. Defaults to None.
+            trackedAgent (int, optional): trackedAgentID. Defaults to None.

        Returns:
            ndarray: nextState, reward, done, loadDir, saveNow
@ -54,13 +68,13 @@ class makeEnv(object):
            discreteActions = np.asarray([[0]])
        else:
            # create discrete action from actions list
-            discreteActions = np.asanyarray([actions[0:self.DISCRETE_SIZE]])
+            discreteActions = np.asanyarray([actions[0 : self.DISCRETE_SIZE]])
        if self.CONTINUOUS_SIZE == 0:
            # create empty continuous action
            continuousActions = np.asanyarray([[0.0]])
        else:
            # create continuous actions from actions list
-            continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE:])
+            continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE :])

        if behaviorName is None:
            behaviorName = self.BEHA_NAME
@ -98,21 +112,28 @@ class makeEnv(object):

        if trackedAgent in decisionSteps:  # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
            nextState = decisionSteps[trackedAgent].obs[0]
-            nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
-            saveNow = nextState[0][-1]
-            loadDir = nextState[0][-3:-1]
-            nextState = nextState[0][:-3]
+            nextState = np.reshape(
+                nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
+            )
+            saveNow = nextState[-1]
+            loadDir = nextState[-3:-1]
+            nextState = nextState[:-3]
            reward = decisionSteps[trackedAgent].reward
            done = False
        if trackedAgent in terminalSteps:  # ゲーム終了した場合、環境状態がterminal_stepsに保存される
            nextState = terminalSteps[trackedAgent].obs[0]
-            nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
-            saveNow = nextState[0][-1]
-            loadDir = nextState[0][-3:-1]
-            nextState = nextState[0][:-3]
+            nextState = np.reshape(
+                nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
+            )
+            saveNow = nextState[-1]
+            loadDir = nextState[-3:-1]
+            nextState = nextState[:-3]
            reward = terminalSteps[trackedAgent].reward
            done = True
-        return nextState, reward, done, loadDir, saveNow
+
+        # stack state
+        stackedStates = self.stackStates(nextState)
+        return stackedStates, reward, done, loadDir, saveNow

    def reset(self):
        """reset enviroment and get observations
@ -120,11 +141,21 @@ class makeEnv(object):
        Returns:
            ndarray: nextState, reward, done, loadDir, saveNow
        """
+        # reset buffer
+        self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
+        # reset env
        self.env.reset()
        nextState, reward, done, loadDir, saveNow = self.getSteps()
        return nextState, reward, done, loadDir, saveNow

+    def stackStates(self, state):
+        # save buffer
+        self.statesBuffer[0:-1] = self.statesBuffer[1:]
+        self.statesBuffer[-1] = state
+
+        # return stacked states
+        return self.statesBuffer[self.STACK_INDEX]
+
    def render(self):
-        """render enviroment
-        """
+        """render enviroment"""
        self.env.render()
--- a/Aimbot-PPO-Python/testarea.ipynb
+++ b/Aimbot-PPO-Python/testarea.ipynb
@ -361,17 +361,102 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3\n",
+      "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1]], maxlen=3)\n",
+      "3\n",
+      "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], maxlen=3)\n",
+      "3\n",
+      "deque([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]], maxlen=3)\n",
+      "3\n",
+      "deque([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]], maxlen=3)\n",
+      "3\n",
+      "deque([[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]], maxlen=3)\n",
+      "3\n",
+      "deque([[4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]], maxlen=3)\n",
+      "3\n",
+      "deque([[5, 5, 5, 5, 5], [6, 6, 6, 6, 6], [7, 7, 7, 7, 7]], maxlen=3)\n",
+      "3\n",
+      "deque([[6, 6, 6, 6, 6], [7, 7, 7, 7, 7], [8, 8, 8, 8, 8]], maxlen=3)\n",
+      "3\n",
+      "deque([[7, 7, 7, 7, 7], [8, 8, 8, 8, 8], [9, 9, 9, 9, 9]], maxlen=3)\n"
+     ]
+    }
+   ],
   "source": [
-    "import keyboard\n",
+    "from collections import deque\n",
+    "import numpy as np\n",
    "\n",
-    "while True:\n",
-    "    if keyboard.is_pressed(\"w\"):\n",
-    "        print(\"w\")\n",
-    "    elif keyboard.is_pressed(\"s\"):\n",
-    "        print(\"s\")"
+    "maxBuffer = 3\n",
+    "stateSize = 5\n",
+    "\n",
+    "aa = deque([[0.0]*stateSize],maxlen=maxBuffer)\n",
+    "\n",
+    "def ss(s):\n",
+    "    aa.append(s)\n",
+    "    if len(aa) < maxBuffer:\n",
+    "        for i in range(maxBuffer - len(aa)):\n",
+    "            aa.appendleft([0.0] * stateSize)\n",
+    "\n",
+    "for i in range(1,10):\n",
+    "    ss([i,i,i,i,i])\n",
+    "    print(len(aa))\n",
+    "    print(aa)\n",
+    "'''\n",
+    "3\n",
+    "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1]], maxlen=3)\n",
+    "3\n",
+    "deque([[0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], maxlen=3)\n",
+    "3\n",
+    "deque([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]], maxlen=3)\n",
+    "3\n",
+    "deque([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]], maxlen=3)\n",
+    "3\n",
+    "deque([[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]], maxlen=3)\n",
+    "3\n",
+    "deque([[4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]], maxlen=3)\n",
+    "3\n",
+    "deque([[5, 5, 5, 5, 5], [6, 6, 6, 6, 6], [7, 7, 7, 7, 7]], maxlen=3)\n",
+    "3\n",
+    "deque([[6, 6, 6, 6, 6], [7, 7, 7, 7, 7], [8, 8, 8, 8, 8]], maxlen=3)\n",
+    "3\n",
+    "deque([[7, 7, 7, 7, 7], [8, 8, 8, 8, 8], [9, 9, 9, 9, 9]], maxlen=3)'''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n",
+      "[0]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from collections import deque\n",
+    "import numpy as np\n",
+    "\n",
+    "ss = 1\n",
+    "si = 0\n",
+    "buffersize = ss + ((ss-1)*si)\n",
+    "print(buffersize)\n",
+    "stackedStates = deque([[0.0] * 10]*6, maxlen=3)\n",
+    "stackedStates.append([1.0]*10)\n",
+    "ssnp = stackedStates\n",
+    "\n",
+    "aa = list(range(0,buffersize,si+1))\n",
+    "print(aa)"
   ]
  }
 ],