GAIL Class, human action record future added

add GAIL GAILMem GAILConfig Class.
add HumanAction record to save expert data.
add tackState future for stack multiple states to let agent knows what happened before.
This commit is contained in:
Koha9 2022-10-14 19:05:02 +09:00
parent ae8a1ba8e2
commit 2a498f18f6
8 changed files with 1066 additions and 39 deletions

4
.gitignore vendored
View File

@ -74,8 +74,10 @@ crashlytics-build.properties
# Python Folder
/Aimbot-PPO-Python/.vscode/
/Aimbot-PPO-Python/.mypy_cache/
/Aimbot-PPO-Python/__pycache__/
/Aimbot-PPO-Python/Backup/
/Aimbot-PPO-Python/Build-MultiScene-WithLoad/
/Aimbot-PPO-Python/Build-CloseEnemyCut/
/Aimbot-PPO-Python/PPO-Model/
/Aimbot-PPO-Python/PPO-Model/
/Aimbot-PPO-Python/GAIL-Expert-Data/

View File

@ -9,13 +9,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
"√√√√√Enviroment Initialized Success√√√√√\n"
"√√√√√Enviroment Initialized Success√√√√√\n",
"√√√√√Buffer Initialized Success√√√√√\n"
]
}
],
"source": [
"import time\n",
"import aimBotEnv\n",
"from GAILMem import GAILMem\n",
"from HumanAction import HumanActions\n",
"\n",
"# Env\n",
@ -23,10 +25,21 @@
"WORKER_ID = 1\n",
"BASE_PORT = 200\n",
"\n",
"MOUSEDISCOUNT = 8.0\n",
"# ENV Para\n",
"MOUSEDISCOUNT = 20.0\n",
"MAX_EP = 10000000\n",
"STACKSTATESIZE = 3\n",
"STACKINTERCE = 29\n",
"\n",
"env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n"
"env = aimBotEnv.makeEnv(\n",
" envPath=ENV_PATH,\n",
" workerID=WORKER_ID,\n",
" basePort=BASE_PORT,\n",
" stackSize=STACKSTATESIZE,\n",
" stackIntercal=STACKINTERCE,\n",
")\n",
"demoMem = GAILMem()\n",
"demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n"
]
},
{
@ -34,6 +47,513 @@
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"EP Start\n",
"EP Start\n",
"EP Start\n",
"nowMemNum 743\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\GAILMem.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
" actionsNP = np.asarray(self.actions)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"nowMemNum 993\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 1199\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 1426\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 1671\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 1890\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 2097\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 2307\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 2510\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 2710\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 2889\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 3079\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 3263\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 3506\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 3764\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 3982\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 4155\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 4338\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 4530\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 4749\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 4979\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 5159\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 5358\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 5641\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 5887\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 6085\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 6312\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 6471\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 6691\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 6885\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 7086\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 7248\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 7437\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 7608\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 7788\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 8020\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 8193\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 8447\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 8675\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 8869\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 9046\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 9260\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 9469\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 9633\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 9802\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 10019\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 10205\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 10387\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 10657\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 10834\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 11071\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 11284\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 11516\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 11735\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 11948\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 12157\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 12330\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 12565\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 12768\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 12944\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 13129\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 13292\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 13590\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 13765\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 13921\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 14083\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 14254\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 14445\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 14662\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 14833\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 15056\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 15258\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 15425\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 15590\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 15829\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 16057\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 16237\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 16411\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 16612\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 16812\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 17001\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 17173\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 17342\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 17515\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 17715\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 17890\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 18072\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 18261\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 18489\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 18701\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 18886\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 19100\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 19318\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 19487\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 19670\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 19881\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 20041\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 20279\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 20491\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 20679\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 20877\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 21070\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 21305\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 21519\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 21760\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 21936\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 22135\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 22304\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 22512\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 22706\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 22882\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 23123\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 23290\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 23453\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 23707\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 23942\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 24153\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 24346\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 24573\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 24757\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n",
"nowMemNum 24957\n",
"lastMemCheckPoint 1\n",
"mem_saved\n",
"EP Start\n"
]
},
{
"ename": "UnityCommunicatorStoppedException",
"evalue": "Communicator has exited.",
@ -41,8 +561,8 @@
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_37248/645561173.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 74\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 75\u001b[0m \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 76\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_19308/2258777724.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 10\u001b[0m \u001b[0mdemoMem\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveMems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mactorProb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mstate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m 86\u001b[0m \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 87\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 88\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 89\u001b[0m \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\timers.py\u001b[0m in \u001b[0;36mwrapped\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 304\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mhierarchical_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__qualname__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 305\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 306\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 307\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m \u001b[1;31m# type: ignore\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\environment.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 333\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_communicator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexchange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep_input\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_poll_process\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 334\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 335\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mUnityCommunicatorStoppedException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Communicator has exited.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 336\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_update_behavior_specs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 337\u001b[0m \u001b[0mrl_output\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moutputs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrl_output\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m: Communicator has exited."
@ -50,13 +570,26 @@
}
],
"source": [
"done = False\n",
"env.reset()\n",
"demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n",
"gailExpertDataDir = \"GAIL-Expert-Data/\"\n",
"state, _, _, _, _ = env.reset()\n",
"\n",
"for ep in range(MAX_EP):\n",
" print(\"EP Start\")\n",
" done = False\n",
" while not done:\n",
" actions = demoAct.getHumanActions()\n",
" env.step(actions=actions)6\n"
" nextState, _, done, _, _ = env.step(actions=actions)\n",
" demoMem.saveMems(state=state, actorProb=None, action=actions, reward=None, done=None)\n",
" state = nextState\n",
" nowMemNum = demoMem.memNum\n",
" saveSteps = 500\n",
" lastMemCheckPoint = 0\n",
" if nowMemNum / saveSteps >= lastMemCheckPoint + 1:\n",
" lastMemCheckPoint +=1\n",
" print(\"nowMemNum\", nowMemNum)\n",
" print(\"lastMemCheckPoint\", lastMemCheckPoint)\n",
" demoMem.saveMemtoFile(gailExpertDataDir)\n",
" print(\"mem_saved\")\n"
]
}
],

170
Aimbot-PPO-Python/GAIL.py Normal file
View File

@ -0,0 +1,170 @@
import tensorflow as tf
import numpy as np
from numpy import ndarray
from PPO import PPO
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from GAILConfig import GAILConfig
EPS = 1e-8
class GAIL(object):
def __init__(
self,
stateSize: int,
disActShape: list,
conActSize: int,
conActRange: float,
gailConfig: GAILConfig,
):
self.stateSize = stateSize
self.disActShape = disActShape
self.disActSize = len(disActShape)
self.conActSize = conActSize
self.conActRange = conActRange
self.totalActSize = self.disActSize + conActSize
self.discrimInputSize = stateSize + self.totalActSize
self.discriminatorNNShape = gailConfig.discrimNNShape
self.discrimLR = gailConfig.discrimLR
self.discrimTrainEpochs = gailConfig.discrimTrainEpochs
self.ppoConfig = gailConfig.ppoConfig
self.ppo = PPO(stateSize, disActShape, conActSize, conActRange, self.ppoConfig)
self.discriminator = self.buildDiscriminatorNet(True)
def buildDiscriminatorNet(self, compileModel: bool):
# -----------Input Layers-----------
stateInput = layers.Input(shape=(self.discrimInputSize,), name="stateInput")
# -------Intermediate layers--------
interLayers = []
interLayersIndex = 0
for neuralUnit in self.discriminatorNNShape:
thisLayerName = "dense" + str(interLayersIndex)
if interLayersIndex == 0:
interLayers.append(
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput)
)
else:
interLayers.append(
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1])
)
interLayersIndex += 1
# ----------Output Layers-----------
output = layers.Dense(1, activation="sigmoid")(interLayers[-1])
# ----------Model Compile-----------
model = keras.Model(inputs=stateInput, outputs=output)
if compileModel:
criticOPT = optimizers.Adam(learning_rate=self.discrimLR)
model.compile(optimizer=criticOPT, loss=self.discrimLoss())
return model
def discrimLoss(self):
def loss(y_true, y_pred):
"""discriminator loss function
Args:
y_true (tf.constant): demo trajectory
y_pred (tf.constant): agent trajectory predict value
Returns:
_type_: _description_
"""
demoP = self.discriminator(y_true)
agentLoss = tf.negative(tf.reduce_mean(tf.math.log(1.0 - y_pred + EPS)))
demoLoss = tf.negative(tf.reduce_mean(tf.math.log(demoP + EPS)))
loss = agentLoss + demoLoss
return loss
return loss
def inference(self, states: ndarray, actions: ndarray):
"""discriminator predict result
Args:
states (ndarray): states
actions (ndarray): actions
Returns:
tf.constant: discrim predict result
"""
# check dimention
if states.ndim != 2:
stateNum = int(len(states) / self.stateSize)
states = states.reshape([stateNum, self.stateSize])
if actions.ndim != 2:
actionsNum = int(len(actions) / self.totalActSize)
actions = actions.reshape([actionsNum, self.totalActSize])
thisTrajectory = tf.concat([states, actions], axis=1)
discrimPredict = self.discriminator(thisTrajectory)
return discrimPredict
def discriminatorACC(
self, demoStates: ndarray, demoActions: ndarray, agentStates: ndarray, agentActions: ndarray
):
demoAcc = np.mean(self.inference(demoStates, demoActions))
agentAcc = np.mean(self.inference(agentStates, agentActions))
return demoAcc, agentAcc
def trainDiscriminator(
self,
demoStates: ndarray,
demoActions: ndarray,
agentStates: ndarray,
agentActions: ndarray,
epochs: int = None,
):
"""train Discriminator
Args:
demoStates (ndarray): expert states
demoActions (ndarray): expert actions
agentStates (ndarray): agentPPO generated states
agentActions (ndarray): agentPPO generated actions
epoch (int): epoch times
Returns:
tf.constant: all losses array
"""
if epochs == None:
epochs = self.discrimTrainEpochs
demoTrajectory = tf.concat([demoStates, demoActions], axis=1)
agentTrajectory = tf.concat([agentStates, agentActions], axis=1)
his = self.discriminator.fit(x=agentTrajectory, y=demoTrajectory, epochs=epochs, verbose=0)
demoAcc = np.mean(self.inference(demoStates, demoActions))
agentAcc = np.mean(self.inference(agentStates, agentActions))
return his.history["loss"], demoAcc, 1 - agentAcc
def getActions(self, state: ndarray):
actions, predictResult = self.ppo.chooseAction(state)
return actions, predictResult
def trainPPO(
self,
states: ndarray,
oldActorResult: ndarray,
actions: ndarray,
newRewards: ndarray,
dones: ndarray,
nextState: ndarray,
epochs: int = None,
):
criticV = self.ppo.getCriticV(states)
discountedR = self.ppo.discountReward(nextState, criticV, dones, newRewards)
advantage = self.ppo.getGAE(discountedR, criticV)
criticLosses = self.ppo.trainCritic(states, discountedR, epochs)
actorLosses = self.ppo.trainActor(states, oldActorResult, actions, advantage, epochs)
return actorLosses, criticLosses
def generateAction(self, states: ndarray):
act, actorP = self.ppo.chooseAction(states)
return act, actorP

View File

@ -0,0 +1,24 @@
import datetime
from typing import NamedTuple
from PPOConfig import PPOConfig
class GAILConfig(NamedTuple):
discrimNNShape: list = [128, 64]
discrimLR: float = 1e-3
discrimTrainEpochs: int = 8
discrimSaveDir: str = "GAIL-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/"
ppoConfig: PPOConfig = PPOConfig(
NNShape=[128, 64],
actorLR=2e-3,
criticLR=2e-3,
gamma=0.99,
lmbda=0.95,
clipRange=0.20,
entropyWeight=1e-2,
trainEpochs=8,
saveDir="GAIL-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/",
loadModelDir=None,
)

View File

@ -0,0 +1,175 @@
import os
import random
import numpy as np
class GAILMem(object):
def __init__(self):
self.states = []
self.actorProbs = []
self.actions = []
self.rewards = []
self.dones = []
self.memNum = 0
print("√√√√√Buffer Initialized Success√√√√√")
def clearMem(self):
"""clearMemories"""
self.states = []
self.actorProbs = []
self.actions = []
self.rewards = []
self.dones = []
self.memNum = 0
def saveMemtoFile(self, dir: str):
"""save memories ndarray to npz file
Args:
dir (str): save direction,like"GAIL-Expert-Data/",end with "/"
"""
statesNP = np.asarray(self.states)
actorProbsNP = np.asarray(self.actorProbs)
actionsNP = np.asarray(self.actions)
rewardsNP = np.asarray(self.rewards)
donesNP = np.asarray(self.dones)
thisSaveDir = dir + "pack-" + str(self.memNum)
try:
np.savez(
thisSaveDir,
states=statesNP,
actorProbs=actorProbsNP,
actions=actionsNP,
rewards=rewardsNP,
dones=donesNP,
)
except FileNotFoundError:
os.mkdir(dir)
np.savez(
thisSaveDir,
states=statesNP,
actorProbs=actorProbsNP,
actions=actionsNP,
rewards=rewardsNP,
dones=donesNP,
)
def loadMemFile(self, dir: str):
"""load memories from mpz file
Args:
dir (str): file direction
"""
self.clearMem()
memFile = np.load(dir)
self.states = memFile["states"].tolist()
self.actorProbs = memFile["actorProbs"].tolist()
self.actions = memFile["actions"].tolist()
self.rewards = memFile["rewards"].tolist()
self.dones = memFile["dones"].tolist()
self.memNum = len(self.states)
def getRandomSample(self, sampleNum: int = 0):
"""get random unique sample set.
Args:
sampleNum (int, optional): sample number, while 0 return all samples. Defaults to 0.
Returns:
tuple: (states,actorProbs,actions,rewards,dones)
"""
if sampleNum == 0:
return (
self.getStates(),
self.getActorProbs(),
self.getActions(),
self.getRewards(),
self.getDones(),
)
else:
randIndex = random.sample(range(0, self.memNum), sampleNum)
return (
self.standDims(np.asarray(self.states)[randIndex]),
self.standDims(np.asarray(self.actorProbs)[randIndex]),
self.standDims(np.asarray(self.actions)[randIndex]),
self.standDims(np.asarray(self.rewards)[randIndex]),
self.standDims(np.asarray(self.dones)[randIndex]),
)
def getStates(self):
"""get all States data as ndarray
Returns:
ndarray: ndarray type State data
"""
return self.standDims(np.asarray(self.states))
def getActorProbs(self):
"""get all ActorProbs data as ndarray
Returns:
ndarray: ndarray type ActorProbs data
"""
return self.standDims(np.asarray(self.actorProbs))
def getActions(self):
"""get all Actions data as ndarray
Returns:
ndarray: ndarray type Actions data
"""
return self.standDims(np.asarray(self.actions))
def getRewards(self):
"""get all Rewards data as ndarray
Returns:
ndarray: ndarray type Rewards data
"""
return self.standDims(np.asarray(self.rewards))
def getDones(self):
"""get all Dones data as ndarray
Returns:
ndarray: ndarray type Dones data
"""
return self.standDims(np.asarray(self.dones))
def standDims(self, data):
"""standalize data's dimension
Args:
data (list): data list
Returns:
ndarray: ndarra type data
"""
# standarlize data's dimension
if np.ndim(data) > 2:
return np.squeeze(data, axis=1)
elif np.ndim(data) < 2:
return np.expand_dims(data, axis=1)
else:
return np.asarray(data)
def saveMems(self, state, actorProb, action, reward, done):
"""save memories
Args:
state (_type_): sates
actorProb (_type_): actor predict result
action (_type_): actor choosed action
reward (_type_): reward
done (function): done
"""
self.states.append(state)
self.actorProbs.append(actorProb)
self.actions.append(action)
self.rewards.append(reward)
self.dones.append(done)
self.memNum += 1

View File

@ -1,5 +1,6 @@
import keyboard
import mouse
import math
class HumanActions:
@ -14,11 +15,13 @@ class HumanActions:
self.screenW = screenW
self.screenH = screenH
self.MOUSEDISCOUNT = mouseDiscount
self.mouseSmooth = 5
self.mouseMax = 10
def getHumanActions(self):
x, _ = mouse.get_position()
xMovement = (x - self.screenW / 2) / self.MOUSEDISCOUNT
xMovement = self.smoothMouseMovement(xMovement)
ws = 0
ad = 0
click = 0
@ -42,10 +45,14 @@ class HumanActions:
elif keyboard.is_pressed("s+a"):
ws = 2
ad = 2
if mouse.is_pressed(button="left"):
if keyboard.is_pressed("0"):
click = 1
actions = [ws, ad, click, [xMovement]]
mouse.move(self.screenW / 2, self.screenH / 2)
return actions
def smoothMouseMovement(self, x: float):
out = (1 / (1 + math.exp(-x / self.mouseSmooth)) - 1 / 2) * self.mouseMax * 2
return out

View File

@ -6,7 +6,14 @@ from numpy import ndarray
class makeEnv(object):
def __init__(self, envPath, workerID, basePort):
def __init__(
self,
envPath: str,
workerID: int = 1,
basePort: int = 100,
stackSize: int = 1,
stackIntercal: int = 0,
):
self.env = UnityEnvironment(
file_name=envPath,
seed=1,
@ -25,24 +32,31 @@ class makeEnv(object):
self.OBSERVATION_SPECS = self.SPEC.observation_specs[0] # observation spec
self.ACTION_SPEC = self.SPEC.action_spec # action specs
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size #  連続的な動作のSize
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size
self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches)
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size #  離散的な動作のSize
self.STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE # 環境観測データ数
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size
self.SINGLE_STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE
self.STATE_SIZE = self.SINGLE_STATE_SIZE * stackSize
# stacked State
self.STACK_SIZE = stackSize
self.STATE_BUFFER_SIZE = stackSize + ((stackSize - 1) * stackIntercal)
self.STACK_INDEX = list(range(0, self.STATE_BUFFER_SIZE, stackIntercal + 1))
self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
print("√√√√√Enviroment Initialized Success√√√√√")
def step(
self,
actions: list,
behaviorName: ndarray = None,
trackedAgent: ndarray = None,
trackedAgent: int = None,
):
"""change ations list to ActionTuple then send it to enviroment
Args:
actions (list): PPO chooseAction output action list
behaviorName (ndarray, optional): behaviorName. Defaults to None.
trackedAgent (ndarray, optional): trackedAgentID. Defaults to None.
trackedAgent (int, optional): trackedAgentID. Defaults to None.
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
@ -54,13 +68,13 @@ class makeEnv(object):
discreteActions = np.asarray([[0]])
else:
# create discrete action from actions list
discreteActions = np.asanyarray([actions[0:self.DISCRETE_SIZE]])
discreteActions = np.asanyarray([actions[0 : self.DISCRETE_SIZE]])
if self.CONTINUOUS_SIZE == 0:
# create empty continuous action
continuousActions = np.asanyarray([[0.0]])
else:
# create continuous actions from actions list
continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE:])
continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE :])
if behaviorName is None:
behaviorName = self.BEHA_NAME
@ -98,21 +112,28 @@ class makeEnv(object):
if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
nextState = decisionSteps[trackedAgent].obs[0]
nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
saveNow = nextState[0][-1]
loadDir = nextState[0][-3:-1]
nextState = nextState[0][:-3]
nextState = np.reshape(
nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
)
saveNow = nextState[-1]
loadDir = nextState[-3:-1]
nextState = nextState[:-3]
reward = decisionSteps[trackedAgent].reward
done = False
if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される
nextState = terminalSteps[trackedAgent].obs[0]
nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
saveNow = nextState[0][-1]
loadDir = nextState[0][-3:-1]
nextState = nextState[0][:-3]
nextState = np.reshape(
nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
)
saveNow = nextState[-1]
loadDir = nextState[-3:-1]
nextState = nextState[:-3]
reward = terminalSteps[trackedAgent].reward
done = True
return nextState, reward, done, loadDir, saveNow
# stack state
stackedStates = self.stackStates(nextState)
return stackedStates, reward, done, loadDir, saveNow
def reset(self):
"""reset enviroment and get observations
@ -120,11 +141,21 @@ class makeEnv(object):
Returns:
ndarray: nextState, reward, done, loadDir, saveNow
"""
# reset buffer
self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
# reset env
self.env.reset()
nextState, reward, done, loadDir, saveNow = self.getSteps()
return nextState, reward, done, loadDir, saveNow
def stackStates(self, state):
# save buffer
self.statesBuffer[0:-1] = self.statesBuffer[1:]
self.statesBuffer[-1] = state
# return stacked states
return self.statesBuffer[self.STACK_INDEX]
def render(self):
"""render enviroment
"""
"""render enviroment"""
self.env.render()

View File

@ -361,17 +361,102 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 21,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3\n",
"deque([[0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1]], maxlen=3)\n",
"3\n",
"deque([[0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], maxlen=3)\n",
"3\n",
"deque([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]], maxlen=3)\n",
"3\n",
"deque([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]], maxlen=3)\n",
"3\n",
"deque([[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]], maxlen=3)\n",
"3\n",
"deque([[4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]], maxlen=3)\n",
"3\n",
"deque([[5, 5, 5, 5, 5], [6, 6, 6, 6, 6], [7, 7, 7, 7, 7]], maxlen=3)\n",
"3\n",
"deque([[6, 6, 6, 6, 6], [7, 7, 7, 7, 7], [8, 8, 8, 8, 8]], maxlen=3)\n",
"3\n",
"deque([[7, 7, 7, 7, 7], [8, 8, 8, 8, 8], [9, 9, 9, 9, 9]], maxlen=3)\n"
]
}
],
"source": [
"import keyboard\n",
"from collections import deque\n",
"import numpy as np\n",
"\n",
"while True:\n",
" if keyboard.is_pressed(\"w\"):\n",
" print(\"w\")\n",
" elif keyboard.is_pressed(\"s\"):\n",
" print(\"s\")"
"maxBuffer = 3\n",
"stateSize = 5\n",
"\n",
"aa = deque([[0.0]*stateSize],maxlen=maxBuffer)\n",
"\n",
"def ss(s):\n",
" aa.append(s)\n",
" if len(aa) < maxBuffer:\n",
" for i in range(maxBuffer - len(aa)):\n",
" aa.appendleft([0.0] * stateSize)\n",
"\n",
"for i in range(1,10):\n",
" ss([i,i,i,i,i])\n",
" print(len(aa))\n",
" print(aa)\n",
"'''\n",
"3\n",
"deque([[0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1]], maxlen=3)\n",
"3\n",
"deque([[0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], maxlen=3)\n",
"3\n",
"deque([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]], maxlen=3)\n",
"3\n",
"deque([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]], maxlen=3)\n",
"3\n",
"deque([[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]], maxlen=3)\n",
"3\n",
"deque([[4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]], maxlen=3)\n",
"3\n",
"deque([[5, 5, 5, 5, 5], [6, 6, 6, 6, 6], [7, 7, 7, 7, 7]], maxlen=3)\n",
"3\n",
"deque([[6, 6, 6, 6, 6], [7, 7, 7, 7, 7], [8, 8, 8, 8, 8]], maxlen=3)\n",
"3\n",
"deque([[7, 7, 7, 7, 7], [8, 8, 8, 8, 8], [9, 9, 9, 9, 9]], maxlen=3)'''"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n",
"[0]\n"
]
}
],
"source": [
"from collections import deque\n",
"import numpy as np\n",
"\n",
"ss = 1\n",
"si = 0\n",
"buffersize = ss + ((ss-1)*si)\n",
"print(buffersize)\n",
"stackedStates = deque([[0.0] * 10]*6, maxlen=3)\n",
"stackedStates.append([1.0]*10)\n",
"ssnp = stackedStates\n",
"\n",
"aa = list(range(0,buffersize,si+1))\n",
"print(aa)"
]
}
],