GAIL Class, human action record future added
add GAIL GAILMem GAILConfig Class. add HumanAction record to save expert data. add tackState future for stack multiple states to let agent knows what happened before.
This commit is contained in:
parent
ae8a1ba8e2
commit
2a498f18f6
4
.gitignore
vendored
4
.gitignore
vendored
@ -74,8 +74,10 @@ crashlytics-build.properties
|
||||
|
||||
# Python Folder
|
||||
/Aimbot-PPO-Python/.vscode/
|
||||
/Aimbot-PPO-Python/.mypy_cache/
|
||||
/Aimbot-PPO-Python/__pycache__/
|
||||
/Aimbot-PPO-Python/Backup/
|
||||
/Aimbot-PPO-Python/Build-MultiScene-WithLoad/
|
||||
/Aimbot-PPO-Python/Build-CloseEnemyCut/
|
||||
/Aimbot-PPO-Python/PPO-Model/
|
||||
/Aimbot-PPO-Python/PPO-Model/
|
||||
/Aimbot-PPO-Python/GAIL-Expert-Data/
|
@ -9,13 +9,15 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"√√√√√Enviroment Initialized Success√√√√√\n"
|
||||
"√√√√√Enviroment Initialized Success√√√√√\n",
|
||||
"√√√√√Buffer Initialized Success√√√√√\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"import aimBotEnv\n",
|
||||
"from GAILMem import GAILMem\n",
|
||||
"from HumanAction import HumanActions\n",
|
||||
"\n",
|
||||
"# Env\n",
|
||||
@ -23,10 +25,21 @@
|
||||
"WORKER_ID = 1\n",
|
||||
"BASE_PORT = 200\n",
|
||||
"\n",
|
||||
"MOUSEDISCOUNT = 8.0\n",
|
||||
"# ENV Para\n",
|
||||
"MOUSEDISCOUNT = 20.0\n",
|
||||
"MAX_EP = 10000000\n",
|
||||
"STACKSTATESIZE = 3\n",
|
||||
"STACKINTERCE = 29\n",
|
||||
"\n",
|
||||
"env = aimBotEnv.makeEnv(envPath=ENV_PATH, workerID=WORKER_ID, basePort=BASE_PORT)\n"
|
||||
"env = aimBotEnv.makeEnv(\n",
|
||||
" envPath=ENV_PATH,\n",
|
||||
" workerID=WORKER_ID,\n",
|
||||
" basePort=BASE_PORT,\n",
|
||||
" stackSize=STACKSTATESIZE,\n",
|
||||
" stackIntercal=STACKINTERCE,\n",
|
||||
")\n",
|
||||
"demoMem = GAILMem()\n",
|
||||
"demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -34,6 +47,513 @@
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"EP Start\n",
|
||||
"EP Start\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 743\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"c:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\GAILMem.py:33: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
|
||||
" actionsNP = np.asarray(self.actions)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"nowMemNum 993\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 1199\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 1426\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 1671\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 1890\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 2097\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 2307\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 2510\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 2710\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 2889\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 3079\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 3263\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 3506\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 3764\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 3982\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 4155\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 4338\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 4530\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 4749\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 4979\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 5159\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 5358\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 5641\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 5887\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 6085\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 6312\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 6471\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 6691\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 6885\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 7086\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 7248\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 7437\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 7608\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 7788\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 8020\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 8193\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 8447\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 8675\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 8869\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 9046\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 9260\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 9469\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 9633\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 9802\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 10019\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 10205\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 10387\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 10657\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 10834\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 11071\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 11284\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 11516\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 11735\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 11948\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 12157\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 12330\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 12565\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 12768\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 12944\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 13129\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 13292\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 13590\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 13765\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 13921\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 14083\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 14254\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 14445\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 14662\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 14833\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 15056\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 15258\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 15425\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 15590\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 15829\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 16057\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 16237\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 16411\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 16612\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 16812\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 17001\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 17173\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 17342\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 17515\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 17715\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 17890\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 18072\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 18261\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 18489\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 18701\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 18886\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 19100\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 19318\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 19487\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 19670\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 19881\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 20041\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 20279\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 20491\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 20679\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 20877\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 21070\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 21305\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 21519\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 21760\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 21936\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 22135\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 22304\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 22512\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 22706\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 22882\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 23123\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 23290\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 23453\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 23707\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 23942\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 24153\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 24346\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 24573\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 24757\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n",
|
||||
"nowMemNum 24957\n",
|
||||
"lastMemCheckPoint 1\n",
|
||||
"mem_saved\n",
|
||||
"EP Start\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "UnityCommunicatorStoppedException",
|
||||
"evalue": "Communicator has exited.",
|
||||
@ -41,8 +561,8 @@
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_37248/645561173.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 73\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 74\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 75\u001b[0m \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 76\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_19308/2258777724.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0mactions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdemoAct\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetHumanActions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 10\u001b[0m \u001b[0mdemoMem\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaveMems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstate\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mactorProb\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mstate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\UCUNI\\OneDrive\\Unity\\ML-Agents\\Aimbot-PPO\\Aimbot-PPO-Python\\aimBotEnv.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, actions, behaviorName, trackedAgent)\u001b[0m\n\u001b[0;32m 86\u001b[0m \u001b[1;31m# take action to env\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 87\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_actions\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehavior_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maction\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthisActionTuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 88\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 89\u001b[0m \u001b[1;31m# get nextState & reward & done after this action\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[0mnextState\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mloadDir\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msaveNow\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetSteps\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbehaviorName\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrackedAgent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\timers.py\u001b[0m in \u001b[0;36mwrapped\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 303\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 304\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mhierarchical_timer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__qualname__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 305\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 306\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 307\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m \u001b[1;31m# type: ignore\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\mlagents_envs\\environment.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 333\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_communicator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexchange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstep_input\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_poll_process\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 334\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0moutputs\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 335\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mUnityCommunicatorStoppedException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Communicator has exited.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 336\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_update_behavior_specs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 337\u001b[0m \u001b[0mrl_output\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0moutputs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrl_output\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[1;31mUnityCommunicatorStoppedException\u001b[0m: Communicator has exited."
|
||||
@ -50,13 +570,26 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"done = False\n",
|
||||
"env.reset()\n",
|
||||
"demoAct = HumanActions(mouseDiscount=MOUSEDISCOUNT)\n",
|
||||
"gailExpertDataDir = \"GAIL-Expert-Data/\"\n",
|
||||
"state, _, _, _, _ = env.reset()\n",
|
||||
"\n",
|
||||
"for ep in range(MAX_EP):\n",
|
||||
" print(\"EP Start\")\n",
|
||||
" done = False\n",
|
||||
" while not done:\n",
|
||||
" actions = demoAct.getHumanActions()\n",
|
||||
" env.step(actions=actions)6\n"
|
||||
" nextState, _, done, _, _ = env.step(actions=actions)\n",
|
||||
" demoMem.saveMems(state=state, actorProb=None, action=actions, reward=None, done=None)\n",
|
||||
" state = nextState\n",
|
||||
" nowMemNum = demoMem.memNum\n",
|
||||
" saveSteps = 500\n",
|
||||
" lastMemCheckPoint = 0\n",
|
||||
" if nowMemNum / saveSteps >= lastMemCheckPoint + 1:\n",
|
||||
" lastMemCheckPoint +=1\n",
|
||||
" print(\"nowMemNum\", nowMemNum)\n",
|
||||
" print(\"lastMemCheckPoint\", lastMemCheckPoint)\n",
|
||||
" demoMem.saveMemtoFile(gailExpertDataDir)\n",
|
||||
" print(\"mem_saved\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
170
Aimbot-PPO-Python/GAIL.py
Normal file
170
Aimbot-PPO-Python/GAIL.py
Normal file
@ -0,0 +1,170 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
from numpy import ndarray
|
||||
|
||||
from PPO import PPO
|
||||
from tensorflow import keras
|
||||
from tensorflow.keras import layers
|
||||
from tensorflow.keras import optimizers
|
||||
|
||||
from GAILConfig import GAILConfig
|
||||
|
||||
EPS = 1e-8
|
||||
|
||||
|
||||
class GAIL(object):
|
||||
def __init__(
|
||||
self,
|
||||
stateSize: int,
|
||||
disActShape: list,
|
||||
conActSize: int,
|
||||
conActRange: float,
|
||||
gailConfig: GAILConfig,
|
||||
):
|
||||
self.stateSize = stateSize
|
||||
self.disActShape = disActShape
|
||||
self.disActSize = len(disActShape)
|
||||
self.conActSize = conActSize
|
||||
self.conActRange = conActRange
|
||||
|
||||
self.totalActSize = self.disActSize + conActSize
|
||||
self.discrimInputSize = stateSize + self.totalActSize
|
||||
self.discriminatorNNShape = gailConfig.discrimNNShape
|
||||
self.discrimLR = gailConfig.discrimLR
|
||||
self.discrimTrainEpochs = gailConfig.discrimTrainEpochs
|
||||
self.ppoConfig = gailConfig.ppoConfig
|
||||
|
||||
self.ppo = PPO(stateSize, disActShape, conActSize, conActRange, self.ppoConfig)
|
||||
self.discriminator = self.buildDiscriminatorNet(True)
|
||||
|
||||
def buildDiscriminatorNet(self, compileModel: bool):
|
||||
# -----------Input Layers-----------
|
||||
stateInput = layers.Input(shape=(self.discrimInputSize,), name="stateInput")
|
||||
|
||||
# -------Intermediate layers--------
|
||||
interLayers = []
|
||||
interLayersIndex = 0
|
||||
for neuralUnit in self.discriminatorNNShape:
|
||||
thisLayerName = "dense" + str(interLayersIndex)
|
||||
if interLayersIndex == 0:
|
||||
interLayers.append(
|
||||
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(stateInput)
|
||||
)
|
||||
else:
|
||||
interLayers.append(
|
||||
layers.Dense(neuralUnit, activation="relu", name=thisLayerName)(interLayers[-1])
|
||||
)
|
||||
interLayersIndex += 1
|
||||
|
||||
# ----------Output Layers-----------
|
||||
output = layers.Dense(1, activation="sigmoid")(interLayers[-1])
|
||||
|
||||
# ----------Model Compile-----------
|
||||
model = keras.Model(inputs=stateInput, outputs=output)
|
||||
if compileModel:
|
||||
criticOPT = optimizers.Adam(learning_rate=self.discrimLR)
|
||||
model.compile(optimizer=criticOPT, loss=self.discrimLoss())
|
||||
return model
|
||||
|
||||
def discrimLoss(self):
|
||||
def loss(y_true, y_pred):
|
||||
"""discriminator loss function
|
||||
|
||||
Args:
|
||||
y_true (tf.constant): demo trajectory
|
||||
y_pred (tf.constant): agent trajectory predict value
|
||||
|
||||
Returns:
|
||||
_type_: _description_
|
||||
"""
|
||||
demoP = self.discriminator(y_true)
|
||||
agentLoss = tf.negative(tf.reduce_mean(tf.math.log(1.0 - y_pred + EPS)))
|
||||
demoLoss = tf.negative(tf.reduce_mean(tf.math.log(demoP + EPS)))
|
||||
loss = agentLoss + demoLoss
|
||||
return loss
|
||||
|
||||
return loss
|
||||
|
||||
def inference(self, states: ndarray, actions: ndarray):
|
||||
"""discriminator predict result
|
||||
|
||||
Args:
|
||||
states (ndarray): states
|
||||
actions (ndarray): actions
|
||||
|
||||
Returns:
|
||||
tf.constant: discrim predict result
|
||||
"""
|
||||
# check dimention
|
||||
if states.ndim != 2:
|
||||
stateNum = int(len(states) / self.stateSize)
|
||||
states = states.reshape([stateNum, self.stateSize])
|
||||
if actions.ndim != 2:
|
||||
actionsNum = int(len(actions) / self.totalActSize)
|
||||
actions = actions.reshape([actionsNum, self.totalActSize])
|
||||
|
||||
thisTrajectory = tf.concat([states, actions], axis=1)
|
||||
discrimPredict = self.discriminator(thisTrajectory)
|
||||
return discrimPredict
|
||||
|
||||
def discriminatorACC(
|
||||
self, demoStates: ndarray, demoActions: ndarray, agentStates: ndarray, agentActions: ndarray
|
||||
):
|
||||
demoAcc = np.mean(self.inference(demoStates, demoActions))
|
||||
agentAcc = np.mean(self.inference(agentStates, agentActions))
|
||||
return demoAcc, agentAcc
|
||||
|
||||
def trainDiscriminator(
|
||||
self,
|
||||
demoStates: ndarray,
|
||||
demoActions: ndarray,
|
||||
agentStates: ndarray,
|
||||
agentActions: ndarray,
|
||||
epochs: int = None,
|
||||
):
|
||||
"""train Discriminator
|
||||
|
||||
Args:
|
||||
demoStates (ndarray): expert states
|
||||
demoActions (ndarray): expert actions
|
||||
agentStates (ndarray): agentPPO generated states
|
||||
agentActions (ndarray): agentPPO generated actions
|
||||
epoch (int): epoch times
|
||||
|
||||
Returns:
|
||||
tf.constant: all losses array
|
||||
"""
|
||||
if epochs == None:
|
||||
epochs = self.discrimTrainEpochs
|
||||
demoTrajectory = tf.concat([demoStates, demoActions], axis=1)
|
||||
agentTrajectory = tf.concat([agentStates, agentActions], axis=1)
|
||||
his = self.discriminator.fit(x=agentTrajectory, y=demoTrajectory, epochs=epochs, verbose=0)
|
||||
|
||||
demoAcc = np.mean(self.inference(demoStates, demoActions))
|
||||
agentAcc = np.mean(self.inference(agentStates, agentActions))
|
||||
return his.history["loss"], demoAcc, 1 - agentAcc
|
||||
|
||||
def getActions(self, state: ndarray):
|
||||
actions, predictResult = self.ppo.chooseAction(state)
|
||||
return actions, predictResult
|
||||
|
||||
def trainPPO(
|
||||
self,
|
||||
states: ndarray,
|
||||
oldActorResult: ndarray,
|
||||
actions: ndarray,
|
||||
newRewards: ndarray,
|
||||
dones: ndarray,
|
||||
nextState: ndarray,
|
||||
epochs: int = None,
|
||||
):
|
||||
criticV = self.ppo.getCriticV(states)
|
||||
discountedR = self.ppo.discountReward(nextState, criticV, dones, newRewards)
|
||||
advantage = self.ppo.getGAE(discountedR, criticV)
|
||||
criticLosses = self.ppo.trainCritic(states, discountedR, epochs)
|
||||
actorLosses = self.ppo.trainActor(states, oldActorResult, actions, advantage, epochs)
|
||||
return actorLosses, criticLosses
|
||||
|
||||
def generateAction(self, states: ndarray):
|
||||
act, actorP = self.ppo.chooseAction(states)
|
||||
return act, actorP
|
24
Aimbot-PPO-Python/GAILConfig.py
Normal file
24
Aimbot-PPO-Python/GAILConfig.py
Normal file
@ -0,0 +1,24 @@
|
||||
import datetime
|
||||
from typing import NamedTuple
|
||||
|
||||
from PPOConfig import PPOConfig
|
||||
|
||||
|
||||
class GAILConfig(NamedTuple):
|
||||
discrimNNShape: list = [128, 64]
|
||||
discrimLR: float = 1e-3
|
||||
discrimTrainEpochs: int = 8
|
||||
discrimSaveDir: str = "GAIL-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/"
|
||||
|
||||
ppoConfig: PPOConfig = PPOConfig(
|
||||
NNShape=[128, 64],
|
||||
actorLR=2e-3,
|
||||
criticLR=2e-3,
|
||||
gamma=0.99,
|
||||
lmbda=0.95,
|
||||
clipRange=0.20,
|
||||
entropyWeight=1e-2,
|
||||
trainEpochs=8,
|
||||
saveDir="GAIL-Model/" + datetime.datetime.now().strftime("%m%d-%H%M") + "/",
|
||||
loadModelDir=None,
|
||||
)
|
175
Aimbot-PPO-Python/GAILMem.py
Normal file
175
Aimbot-PPO-Python/GAILMem.py
Normal file
@ -0,0 +1,175 @@
|
||||
import os
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
|
||||
class GAILMem(object):
|
||||
def __init__(self):
|
||||
self.states = []
|
||||
self.actorProbs = []
|
||||
self.actions = []
|
||||
self.rewards = []
|
||||
self.dones = []
|
||||
self.memNum = 0
|
||||
print("√√√√√Buffer Initialized Success√√√√√")
|
||||
|
||||
def clearMem(self):
|
||||
"""clearMemories"""
|
||||
self.states = []
|
||||
self.actorProbs = []
|
||||
self.actions = []
|
||||
self.rewards = []
|
||||
self.dones = []
|
||||
self.memNum = 0
|
||||
|
||||
def saveMemtoFile(self, dir: str):
|
||||
"""save memories ndarray to npz file
|
||||
|
||||
Args:
|
||||
dir (str): save direction,like"GAIL-Expert-Data/",end with "/"
|
||||
"""
|
||||
statesNP = np.asarray(self.states)
|
||||
actorProbsNP = np.asarray(self.actorProbs)
|
||||
actionsNP = np.asarray(self.actions)
|
||||
rewardsNP = np.asarray(self.rewards)
|
||||
donesNP = np.asarray(self.dones)
|
||||
thisSaveDir = dir + "pack-" + str(self.memNum)
|
||||
try:
|
||||
np.savez(
|
||||
thisSaveDir,
|
||||
states=statesNP,
|
||||
actorProbs=actorProbsNP,
|
||||
actions=actionsNP,
|
||||
rewards=rewardsNP,
|
||||
dones=donesNP,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
os.mkdir(dir)
|
||||
np.savez(
|
||||
thisSaveDir,
|
||||
states=statesNP,
|
||||
actorProbs=actorProbsNP,
|
||||
actions=actionsNP,
|
||||
rewards=rewardsNP,
|
||||
dones=donesNP,
|
||||
)
|
||||
|
||||
def loadMemFile(self, dir: str):
|
||||
"""load memories from mpz file
|
||||
|
||||
Args:
|
||||
dir (str): file direction
|
||||
"""
|
||||
self.clearMem()
|
||||
memFile = np.load(dir)
|
||||
self.states = memFile["states"].tolist()
|
||||
self.actorProbs = memFile["actorProbs"].tolist()
|
||||
self.actions = memFile["actions"].tolist()
|
||||
self.rewards = memFile["rewards"].tolist()
|
||||
self.dones = memFile["dones"].tolist()
|
||||
self.memNum = len(self.states)
|
||||
|
||||
def getRandomSample(self, sampleNum: int = 0):
|
||||
"""get random unique sample set.
|
||||
|
||||
Args:
|
||||
sampleNum (int, optional): sample number, while 0 return all samples. Defaults to 0.
|
||||
|
||||
Returns:
|
||||
tuple: (states,actorProbs,actions,rewards,dones)
|
||||
"""
|
||||
if sampleNum == 0:
|
||||
return (
|
||||
self.getStates(),
|
||||
self.getActorProbs(),
|
||||
self.getActions(),
|
||||
self.getRewards(),
|
||||
self.getDones(),
|
||||
)
|
||||
else:
|
||||
randIndex = random.sample(range(0, self.memNum), sampleNum)
|
||||
return (
|
||||
self.standDims(np.asarray(self.states)[randIndex]),
|
||||
self.standDims(np.asarray(self.actorProbs)[randIndex]),
|
||||
self.standDims(np.asarray(self.actions)[randIndex]),
|
||||
self.standDims(np.asarray(self.rewards)[randIndex]),
|
||||
self.standDims(np.asarray(self.dones)[randIndex]),
|
||||
)
|
||||
|
||||
def getStates(self):
|
||||
"""get all States data as ndarray
|
||||
|
||||
Returns:
|
||||
ndarray: ndarray type State data
|
||||
"""
|
||||
return self.standDims(np.asarray(self.states))
|
||||
|
||||
def getActorProbs(self):
|
||||
"""get all ActorProbs data as ndarray
|
||||
|
||||
Returns:
|
||||
ndarray: ndarray type ActorProbs data
|
||||
"""
|
||||
|
||||
return self.standDims(np.asarray(self.actorProbs))
|
||||
|
||||
def getActions(self):
|
||||
"""get all Actions data as ndarray
|
||||
|
||||
Returns:
|
||||
ndarray: ndarray type Actions data
|
||||
"""
|
||||
|
||||
return self.standDims(np.asarray(self.actions))
|
||||
|
||||
def getRewards(self):
|
||||
"""get all Rewards data as ndarray
|
||||
|
||||
Returns:
|
||||
ndarray: ndarray type Rewards data
|
||||
"""
|
||||
|
||||
return self.standDims(np.asarray(self.rewards))
|
||||
|
||||
def getDones(self):
|
||||
"""get all Dones data as ndarray
|
||||
|
||||
Returns:
|
||||
ndarray: ndarray type Dones data
|
||||
"""
|
||||
|
||||
return self.standDims(np.asarray(self.dones))
|
||||
|
||||
def standDims(self, data):
|
||||
"""standalize data's dimension
|
||||
|
||||
Args:
|
||||
data (list): data list
|
||||
|
||||
Returns:
|
||||
ndarray: ndarra type data
|
||||
"""
|
||||
# standarlize data's dimension
|
||||
if np.ndim(data) > 2:
|
||||
return np.squeeze(data, axis=1)
|
||||
elif np.ndim(data) < 2:
|
||||
return np.expand_dims(data, axis=1)
|
||||
else:
|
||||
return np.asarray(data)
|
||||
|
||||
def saveMems(self, state, actorProb, action, reward, done):
|
||||
"""save memories
|
||||
|
||||
Args:
|
||||
state (_type_): sates
|
||||
actorProb (_type_): actor predict result
|
||||
action (_type_): actor choosed action
|
||||
reward (_type_): reward
|
||||
done (function): done
|
||||
"""
|
||||
self.states.append(state)
|
||||
self.actorProbs.append(actorProb)
|
||||
self.actions.append(action)
|
||||
self.rewards.append(reward)
|
||||
self.dones.append(done)
|
||||
self.memNum += 1
|
@ -1,5 +1,6 @@
|
||||
import keyboard
|
||||
import mouse
|
||||
import math
|
||||
|
||||
|
||||
class HumanActions:
|
||||
@ -14,11 +15,13 @@ class HumanActions:
|
||||
self.screenW = screenW
|
||||
self.screenH = screenH
|
||||
self.MOUSEDISCOUNT = mouseDiscount
|
||||
self.mouseSmooth = 5
|
||||
self.mouseMax = 10
|
||||
|
||||
def getHumanActions(self):
|
||||
x, _ = mouse.get_position()
|
||||
xMovement = (x - self.screenW / 2) / self.MOUSEDISCOUNT
|
||||
|
||||
xMovement = self.smoothMouseMovement(xMovement)
|
||||
ws = 0
|
||||
ad = 0
|
||||
click = 0
|
||||
@ -42,10 +45,14 @@ class HumanActions:
|
||||
elif keyboard.is_pressed("s+a"):
|
||||
ws = 2
|
||||
ad = 2
|
||||
if mouse.is_pressed(button="left"):
|
||||
if keyboard.is_pressed("0"):
|
||||
click = 1
|
||||
|
||||
actions = [ws, ad, click, [xMovement]]
|
||||
|
||||
mouse.move(self.screenW / 2, self.screenH / 2)
|
||||
return actions
|
||||
|
||||
def smoothMouseMovement(self, x: float):
|
||||
out = (1 / (1 + math.exp(-x / self.mouseSmooth)) - 1 / 2) * self.mouseMax * 2
|
||||
return out
|
||||
|
@ -6,7 +6,14 @@ from numpy import ndarray
|
||||
|
||||
|
||||
class makeEnv(object):
|
||||
def __init__(self, envPath, workerID, basePort):
|
||||
def __init__(
|
||||
self,
|
||||
envPath: str,
|
||||
workerID: int = 1,
|
||||
basePort: int = 100,
|
||||
stackSize: int = 1,
|
||||
stackIntercal: int = 0,
|
||||
):
|
||||
self.env = UnityEnvironment(
|
||||
file_name=envPath,
|
||||
seed=1,
|
||||
@ -25,24 +32,31 @@ class makeEnv(object):
|
||||
self.OBSERVATION_SPECS = self.SPEC.observation_specs[0] # observation spec
|
||||
self.ACTION_SPEC = self.SPEC.action_spec # action specs
|
||||
|
||||
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size # 連続的な動作のSize
|
||||
self.DISCRETE_SIZE = self.ACTION_SPEC.discrete_size
|
||||
self.DISCRETE_SHAPE = list(self.ACTION_SPEC.discrete_branches)
|
||||
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size # 離散的な動作のSize
|
||||
self.STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE # 環境観測データ数
|
||||
self.CONTINUOUS_SIZE = self.ACTION_SPEC.continuous_size
|
||||
self.SINGLE_STATE_SIZE = self.OBSERVATION_SPECS.shape[0] - self.LOAD_DIR_SIZE_IN_STATE
|
||||
self.STATE_SIZE = self.SINGLE_STATE_SIZE * stackSize
|
||||
|
||||
# stacked State
|
||||
self.STACK_SIZE = stackSize
|
||||
self.STATE_BUFFER_SIZE = stackSize + ((stackSize - 1) * stackIntercal)
|
||||
self.STACK_INDEX = list(range(0, self.STATE_BUFFER_SIZE, stackIntercal + 1))
|
||||
self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
|
||||
print("√√√√√Enviroment Initialized Success√√√√√")
|
||||
|
||||
def step(
|
||||
self,
|
||||
actions: list,
|
||||
behaviorName: ndarray = None,
|
||||
trackedAgent: ndarray = None,
|
||||
trackedAgent: int = None,
|
||||
):
|
||||
"""change ations list to ActionTuple then send it to enviroment
|
||||
|
||||
Args:
|
||||
actions (list): PPO chooseAction output action list
|
||||
behaviorName (ndarray, optional): behaviorName. Defaults to None.
|
||||
trackedAgent (ndarray, optional): trackedAgentID. Defaults to None.
|
||||
trackedAgent (int, optional): trackedAgentID. Defaults to None.
|
||||
|
||||
Returns:
|
||||
ndarray: nextState, reward, done, loadDir, saveNow
|
||||
@ -54,13 +68,13 @@ class makeEnv(object):
|
||||
discreteActions = np.asarray([[0]])
|
||||
else:
|
||||
# create discrete action from actions list
|
||||
discreteActions = np.asanyarray([actions[0:self.DISCRETE_SIZE]])
|
||||
discreteActions = np.asanyarray([actions[0 : self.DISCRETE_SIZE]])
|
||||
if self.CONTINUOUS_SIZE == 0:
|
||||
# create empty continuous action
|
||||
continuousActions = np.asanyarray([[0.0]])
|
||||
else:
|
||||
# create continuous actions from actions list
|
||||
continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE:])
|
||||
continuousActions = np.asanyarray(actions[self.DISCRETE_SIZE :])
|
||||
|
||||
if behaviorName is None:
|
||||
behaviorName = self.BEHA_NAME
|
||||
@ -98,21 +112,28 @@ class makeEnv(object):
|
||||
|
||||
if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される
|
||||
nextState = decisionSteps[trackedAgent].obs[0]
|
||||
nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
|
||||
saveNow = nextState[0][-1]
|
||||
loadDir = nextState[0][-3:-1]
|
||||
nextState = nextState[0][:-3]
|
||||
nextState = np.reshape(
|
||||
nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
|
||||
)
|
||||
saveNow = nextState[-1]
|
||||
loadDir = nextState[-3:-1]
|
||||
nextState = nextState[:-3]
|
||||
reward = decisionSteps[trackedAgent].reward
|
||||
done = False
|
||||
if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される
|
||||
nextState = terminalSteps[trackedAgent].obs[0]
|
||||
nextState = np.reshape(nextState, [1, self.STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE])
|
||||
saveNow = nextState[0][-1]
|
||||
loadDir = nextState[0][-3:-1]
|
||||
nextState = nextState[0][:-3]
|
||||
nextState = np.reshape(
|
||||
nextState, [self.SINGLE_STATE_SIZE + self.LOAD_DIR_SIZE_IN_STATE]
|
||||
)
|
||||
saveNow = nextState[-1]
|
||||
loadDir = nextState[-3:-1]
|
||||
nextState = nextState[:-3]
|
||||
reward = terminalSteps[trackedAgent].reward
|
||||
done = True
|
||||
return nextState, reward, done, loadDir, saveNow
|
||||
|
||||
# stack state
|
||||
stackedStates = self.stackStates(nextState)
|
||||
return stackedStates, reward, done, loadDir, saveNow
|
||||
|
||||
def reset(self):
|
||||
"""reset enviroment and get observations
|
||||
@ -120,11 +141,21 @@ class makeEnv(object):
|
||||
Returns:
|
||||
ndarray: nextState, reward, done, loadDir, saveNow
|
||||
"""
|
||||
# reset buffer
|
||||
self.statesBuffer = np.array([[0.0] * self.SINGLE_STATE_SIZE] * self.STATE_BUFFER_SIZE)
|
||||
# reset env
|
||||
self.env.reset()
|
||||
nextState, reward, done, loadDir, saveNow = self.getSteps()
|
||||
return nextState, reward, done, loadDir, saveNow
|
||||
|
||||
def stackStates(self, state):
|
||||
# save buffer
|
||||
self.statesBuffer[0:-1] = self.statesBuffer[1:]
|
||||
self.statesBuffer[-1] = state
|
||||
|
||||
# return stacked states
|
||||
return self.statesBuffer[self.STACK_INDEX]
|
||||
|
||||
def render(self):
|
||||
"""render enviroment
|
||||
"""
|
||||
"""render enviroment"""
|
||||
self.env.render()
|
||||
|
@ -361,17 +361,102 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"3\n",
|
||||
"deque([[0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[5, 5, 5, 5, 5], [6, 6, 6, 6, 6], [7, 7, 7, 7, 7]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[6, 6, 6, 6, 6], [7, 7, 7, 7, 7], [8, 8, 8, 8, 8]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[7, 7, 7, 7, 7], [8, 8, 8, 8, 8], [9, 9, 9, 9, 9]], maxlen=3)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import keyboard\n",
|
||||
"from collections import deque\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"while True:\n",
|
||||
" if keyboard.is_pressed(\"w\"):\n",
|
||||
" print(\"w\")\n",
|
||||
" elif keyboard.is_pressed(\"s\"):\n",
|
||||
" print(\"s\")"
|
||||
"maxBuffer = 3\n",
|
||||
"stateSize = 5\n",
|
||||
"\n",
|
||||
"aa = deque([[0.0]*stateSize],maxlen=maxBuffer)\n",
|
||||
"\n",
|
||||
"def ss(s):\n",
|
||||
" aa.append(s)\n",
|
||||
" if len(aa) < maxBuffer:\n",
|
||||
" for i in range(maxBuffer - len(aa)):\n",
|
||||
" aa.appendleft([0.0] * stateSize)\n",
|
||||
"\n",
|
||||
"for i in range(1,10):\n",
|
||||
" ss([i,i,i,i,i])\n",
|
||||
" print(len(aa))\n",
|
||||
" print(aa)\n",
|
||||
"'''\n",
|
||||
"3\n",
|
||||
"deque([[0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[0.0, 0.0, 0.0, 0.0, 0.0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[3, 3, 3, 3, 3], [4, 4, 4, 4, 4], [5, 5, 5, 5, 5]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[4, 4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[5, 5, 5, 5, 5], [6, 6, 6, 6, 6], [7, 7, 7, 7, 7]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[6, 6, 6, 6, 6], [7, 7, 7, 7, 7], [8, 8, 8, 8, 8]], maxlen=3)\n",
|
||||
"3\n",
|
||||
"deque([[7, 7, 7, 7, 7], [8, 8, 8, 8, 8], [9, 9, 9, 9, 9]], maxlen=3)'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1\n",
|
||||
"[0]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from collections import deque\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"ss = 1\n",
|
||||
"si = 0\n",
|
||||
"buffersize = ss + ((ss-1)*si)\n",
|
||||
"print(buffersize)\n",
|
||||
"stackedStates = deque([[0.0] * 10]*6, maxlen=3)\n",
|
||||
"stackedStates.append([1.0]*10)\n",
|
||||
"ssnp = stackedStates\n",
|
||||
"\n",
|
||||
"aa = list(range(0,buffersize,si+1))\n",
|
||||
"print(aa)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
Loading…
Reference in New Issue
Block a user