Aimbot-PPO/Aimbot-PPO-Python/Pytorch/testarea.ipynb
Koha9 4b8ffeac6d GAIL V0.1 save point
GAIL V0.1 save point
todo 
1.human action record(GAILMem) debug
2.gail debug
2022-12-04 08:42:10 +09:00

848 lines
44 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Action, 1 continuous ctrl 2.1\n",
"Action, 0 continuous ctrl -1.1\n"
]
}
],
"source": [
"import gym\n",
"from gym.spaces import Dict, Discrete, Box, Tuple\n",
"import numpy as np\n",
"\n",
"\n",
"class SampleGym(gym.Env):\n",
" def __init__(self, config={}):\n",
" self.config = config\n",
" self.action_space = Tuple((Discrete(2), Box(-10, 10, (2,))))\n",
" self.observation_space = Box(-10, 10, (2, 2))\n",
" self.p_done = config.get(\"p_done\", 0.1)\n",
"\n",
" def reset(self):\n",
" return self.observation_space.sample()\n",
"\n",
" def step(self, action):\n",
" chosen_action = action[0]\n",
" cnt_control = action[1][chosen_action]\n",
"\n",
" if chosen_action == 0:\n",
" reward = cnt_control\n",
" else:\n",
" reward = -cnt_control - 1\n",
"\n",
" print(f\"Action, {chosen_action} continuous ctrl {cnt_control}\")\n",
" return (\n",
" self.observation_space.sample(),\n",
" reward,\n",
" bool(np.random.choice([True, False], p=[self.p_done, 1.0 - self.p_done])),\n",
" {},\n",
" )\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" env = SampleGym()\n",
" env.reset()\n",
" env.step((1, [-1, 2.1])) # should say use action 1 with 2.1\n",
" env.step((0, [-1.1, 2.1])) # should say use action 0 with -1.1"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from mlagents_envs.environment import UnityEnvironment\n",
"from gym_unity.envs import UnityToGymWrapper\n",
"import numpy as np\n",
"\n",
"ENV_PATH = \"../Build-ParallelEnv/Aimbot-ParallelEnv\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 2002\n",
"\n",
"env = UnityEnvironment(\n",
" file_name=ENV_PATH,\n",
" seed=1,\n",
" side_channels=[],\n",
" worker_id=WORKER_ID,\n",
" base_port=BASE_PORT,\n",
")\n",
"\n",
"trackedAgent = 0\n",
"env.reset()\n",
"BEHA_SPECS = env.behavior_specs\n",
"BEHA_NAME = list(BEHA_SPECS)[0]\n",
"SPEC = BEHA_SPECS[BEHA_NAME]\n",
"print(SPEC)\n",
"\n",
"decisionSteps, terminalSteps = env.get_steps(BEHA_NAME)\n",
"\n",
"if trackedAgent in decisionSteps: # ゲーム終了していない場合、環境状態がdecision_stepsに保存される\n",
" nextState = decisionSteps[trackedAgent].obs[0]\n",
" reward = decisionSteps[trackedAgent].reward\n",
" done = False\n",
"if trackedAgent in terminalSteps: # ゲーム終了した場合、環境状態がterminal_stepsに保存される\n",
" nextState = terminalSteps[trackedAgent].obs[0]\n",
" reward = terminalSteps[trackedAgent].reward\n",
" done = True\n",
"print(decisionSteps.agent_id)\n",
"print(terminalSteps)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"decisionSteps.agent_id [1 2 5 7]\n",
"decisionSteps.agent_id_to_index {1: 0, 2: 1, 5: 2, 7: 3}\n",
"decisionSteps.reward [0. 0. 0. 0.]\n",
"decisionSteps.action_mask [array([[False, False, False],\n",
" [False, False, False],\n",
" [False, False, False],\n",
" [False, False, False]]), array([[False, False, False],\n",
" [False, False, False],\n",
" [False, False, False],\n",
" [False, False, False]]), array([[False, False],\n",
" [False, False],\n",
" [False, False],\n",
" [False, False]])]\n",
"decisionSteps.obs [ 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. 0. 0. 0. 0.\n",
" 0. 0. -15.994009 1. -26.322788 1.\n",
" 1. 1. 1. 1. 1. 2.\n",
" 1. 1. 1. 1. 1. 1.\n",
" 1. 1.3519633 1.6946528 2.3051548 3.673389 9.067246\n",
" 17.521473 21.727095 22.753294 24.167128 25.905216 18.35725\n",
" 21.02278 21.053417 0. ]\n"
]
},
{
"data": {
"text/plain": [
"'decisionSteps.obs [array([[-15.994009 , 1. , -26.322788 , 1. , 1. ,\\n 1. , 1. , 1. , 1. , 2. ,\\n 1. , 1. , 1. , 1. , 1. ,\\n 1. , 1. , 1.3519633, 1.6946528, 2.3051548,\\n 3.673389 , 9.067246 , 17.521473 , 21.727095 , 22.753294 ,\\n 24.167128 , 25.905216 , 18.35725 , 21.02278 , 21.053417 ,\\n 0. ],\\n [ -1.8809433, 1. , -25.66834 , 1. , 2. ,\\n 1. , 1. , 1. , 1. , 1. ,\\n 1. , 1. , 1. , 1. , 1. ,\\n 1. , 1. , 16.768637 , 23.414627 , 22.04486 ,\\n 21.050663 , 20.486784 , 20.486784 , 21.050665 , 15.049731 ,\\n 11.578419 , 9.695194 , 20.398016 , 20.368341 , 20.398016 ,\\n...\\n 20.551746 , 20.00118 , 20.001116 , 20.551594 , 21.5222 ,\\n 17.707508 , 14.86889 , 19.914494 , 19.885508 , 19.914463 ,\\n 0. ]], dtype=float32)]'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(\"decisionSteps.agent_id\",decisionSteps.agent_id)\n",
"# decisionSteps.agent_id [1 2 5 7]\n",
"print(\"decisionSteps.agent_id_to_index\",decisionSteps.agent_id_to_index)\n",
"# decisionSteps.agent_id_to_index {1: 0, 2: 1, 5: 2, 7: 3}\n",
"print(\"decisionSteps.reward\",decisionSteps.reward)\n",
"# decisionSteps.reward [0. 0. 0. 0.]\n",
"print(\"decisionSteps.action_mask\",decisionSteps.action_mask)\n",
"'''\n",
"decisionSteps.action_mask [array([[False, False, False],\n",
" [False, False, False],\n",
" [False, False, False],\n",
" [False, False, False]]), array([[False, False, False],\n",
" [False, False, False],\n",
" [False, False, False],\n",
" [False, False, False]]), array([[False, False],\n",
" [False, False],\n",
" [False, False],\n",
" [False, False]])]\n",
"'''\n",
"print(\"decisionSteps.obs\", decisionSteps.obs[0][0])\n",
"'''decisionSteps.obs [array([[-15.994009 , 1. , -26.322788 , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 2. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1.3519633, 1.6946528, 2.3051548,\n",
" 3.673389 , 9.067246 , 17.521473 , 21.727095 , 22.753294 ,\n",
" 24.167128 , 25.905216 , 18.35725 , 21.02278 , 21.053417 ,\n",
" 0. ],\n",
" [ -1.8809433, 1. , -25.66834 , 1. , 2. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 16.768637 , 23.414627 , 22.04486 ,\n",
" 21.050663 , 20.486784 , 20.486784 , 21.050665 , 15.049731 ,\n",
" 11.578419 , 9.695194 , 20.398016 , 20.368341 , 20.398016 ,\n",
"...\n",
" 20.551746 , 20.00118 , 20.001116 , 20.551594 , 21.5222 ,\n",
" 17.707508 , 14.86889 , 19.914494 , 19.885508 , 19.914463 ,\n",
" 0. ]], dtype=float32)]'''\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from AimbotEnv import Aimbot\n",
"\n",
"ENV_PATH = \"../Build-ParallelEnv/Aimbot-ParallelEnv\"\n",
"WORKER_ID = 1\n",
"BASE_PORT = 2002\n",
"\n",
"env = Aimbot(envPath=ENV_PATH,workerID= WORKER_ID,basePort= BASE_PORT)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([[ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -15.994009 , 1. , -26.322788 , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 2. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1.3519633, 1.6946528,\n",
" 2.3051548, 3.673389 , 9.067246 , 17.521473 , 21.727095 ,\n",
" 22.753294 , 24.167128 , 25.905216 , 18.35725 , 21.02278 ,\n",
" 21.053417 , 0. , -15.994003 , 1. , -26.322784 ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1.3519667,\n",
" 1.6946585, 2.3051722, 3.6734192, 9.067533 , 21.145092 ,\n",
" 21.727148 , 22.753365 , 24.167217 , 25.905317 , 18.358263 ,\n",
" 21.022812 , 21.053455 , 0. ],\n",
" [ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -1.8809433, 1. , -25.66834 , 1. ,\n",
" 2. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 16.768637 , 23.414627 ,\n",
" 22.04486 , 21.050663 , 20.486784 , 20.486784 , 21.050665 ,\n",
" 15.049731 , 11.578419 , 9.695194 , 20.398016 , 20.368341 ,\n",
" 20.398016 , 0. , -1.8809433, 1. , -25.66834 ,\n",
" 1. , 1. , 2. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 2. ,\n",
" 2. , 1. , 1. , 1. , 25.098585 ,\n",
" 15.749494 , 22.044899 , 21.050697 , 20.486813 , 20.486813 ,\n",
" 21.050694 , 15.049746 , 3.872317 , 3.789325 , 20.398046 ,\n",
" 20.368372 , 20.398046 , 0. ],\n",
" [ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -13.672583 , 1. , -26.479263 , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 5.3249803, 6.401276 ,\n",
" 8.374101 , 12.8657875, 21.302414 , 21.30242 , 21.888742 ,\n",
" 22.92251 , 24.346794 , 26.09773 , 21.210114 , 21.179258 ,\n",
" 21.210117 , 0. , -13.672583 , 1. , -26.479263 ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 2. , 1. , 1. ,\n",
" 2. , 1. , 1. , 2. , 5.3249855,\n",
" 6.4012837, 8.374114 , 12.865807 , 21.302446 , 21.30245 ,\n",
" 16.168503 , 22.922543 , 24.346823 , 7.1110754, 21.210148 ,\n",
" 21.17929 , 12.495141 , 0. ],\n",
" [ 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 0. , 0. , 0. ,\n",
" 0. , -4.9038744, 1. , -25.185507 , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 20.33171 , 22.859762 ,\n",
" 21.522427 , 20.551746 , 20.00118 , 20.001116 , 20.551594 ,\n",
" 21.5222 , 17.707508 , 14.86889 , 19.914494 , 19.885508 ,\n",
" 19.914463 , 0. , -4.9038773, 1. , -25.185507 ,\n",
" 1. , 2. , 1. , 2. , 1. ,\n",
" 1. , 1. , 1. , 2. , 1. ,\n",
" 1. , 1. , 1. , 1. , 15.905993 ,\n",
" 22.85977 , 11.566693 , 20.551773 , 20.00121 , 20.001146 ,\n",
" 20.551619 , 7.135157 , 17.707582 , 14.868943 , 19.914528 ,\n",
" 19.88554 , 19.914494 , 0. ]], dtype=float32),\n",
" [[-0.05], [-0.05], [-0.05], [-0.05]],\n",
" [[False], [False], [False], [False]])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"env.unity_observation_shape\n",
"(128, 4) + env.unity_observation_shape\n",
"env.reset()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[1, 2, 3],\n",
" [1, 2, 3],\n",
" [1, 2, 3],\n",
" [1, 2, 3]], device='cuda:0')\n",
"tensor([[1],\n",
" [2],\n",
" [3],\n",
" [4]], device='cuda:0')\n"
]
},
{
"data": {
"text/plain": [
"tensor([[1, 2, 3, 1],\n",
" [1, 2, 3, 2],\n",
" [1, 2, 3, 3],\n",
" [1, 2, 3, 4]], device='cuda:0')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"aa = torch.tensor([[1,2,3],[1,2,3],[1,2,3],[1,2,3]]).to(\"cuda:0\")\n",
"bb = torch.tensor([[1],[2],[3],[4]]).to(\"cuda:0\")\n",
"print(aa)\n",
"print(bb)\n",
"torch.cat([aa,bb],axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "Can't get attribute 'PPOAgent' on <module '__main__'>",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_31348\\1930153251.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mmymodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"../PPO-Model/SmallArea-256-128-hybrid.pt\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mmymodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0meval\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\serialization.py\u001b[0m in \u001b[0;36mload\u001b[1;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[0;32m 710\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mseek\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0morig_position\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 711\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 712\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_load\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mopened_zipfile\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpickle_module\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 713\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0m_legacy_load\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmap_location\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpickle_module\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 714\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\serialization.py\u001b[0m in \u001b[0;36m_load\u001b[1;34m(zip_file, map_location, pickle_module, pickle_file, **pickle_load_args)\u001b[0m\n\u001b[0;32m 1047\u001b[0m \u001b[0munpickler\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mUnpicklerWrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata_file\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mpickle_load_args\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1048\u001b[0m \u001b[0munpickler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpersistent_load\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpersistent_load\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1049\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0munpickler\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1050\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1051\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_utils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_validate_loaded_sparse_tensors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\UCUNI\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\serialization.py\u001b[0m in \u001b[0;36mfind_class\u001b[1;34m(self, mod_name, name)\u001b[0m\n\u001b[0;32m 1040\u001b[0m \u001b[1;32mpass\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1041\u001b[0m \u001b[0mmod_name\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mload_module_mapping\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmod_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmod_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1042\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfind_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmod_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1043\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1044\u001b[0m \u001b[1;31m# Load the data (which may in turn use `persistent_load` to load tensors)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mAttributeError\u001b[0m: Can't get attribute 'PPOAgent' on <module '__main__'>"
]
}
],
"source": [
"import torch\n",
"\n",
"def layer_init(layer, std=np.sqrt(2), bias_const=0.0):\n",
" torch.nn.init.orthogonal_(layer.weight, std)\n",
" torch.nn.init.constant_(layer.bias, bias_const)\n",
" return layer\n",
"\n",
"class PPOAgent(nn.Module):\n",
" def __init__(self, env: Aimbot):\n",
" super(PPOAgent, self).__init__()\n",
" self.discrete_size = env.unity_discrete_size\n",
" self.discrete_shape = list(env.unity_discrete_branches)\n",
" self.continuous_size = env.unity_continuous_size\n",
"\n",
" self.network = nn.Sequential(\n",
" layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 256)),\n",
" nn.ReLU(),\n",
" layer_init(nn.Linear(256, 128)),\n",
" nn.ReLU(),\n",
" )\n",
" self.actor_dis = layer_init(nn.Linear(128, self.discrete_size), std=0.01)\n",
" self.actor_mean = layer_init(nn.Linear(128, self.continuous_size), std=0.01)\n",
" self.actor_logstd = nn.Parameter(torch.zeros(1, self.continuous_size))\n",
" self.critic = layer_init(nn.Linear(128, 1), std=1)\n",
"\n",
" def get_value(self, state: torch.Tensor):\n",
" return self.critic(self.network(state))\n",
"\n",
" def get_actions_value(self, state: torch.Tensor, actions=None):\n",
" hidden = self.network(state)\n",
" # discrete\n",
" dis_logits = self.actor_dis(hidden)\n",
" split_logits = torch.split(dis_logits, self.discrete_shape, dim=1)\n",
" multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits]\n",
" # continuous\n",
" actions_mean = self.actor_mean(hidden)\n",
" action_logstd = self.actor_logstd.expand_as(actions_mean)\n",
" action_std = torch.exp(action_logstd)\n",
" con_probs = Normal(actions_mean, action_std)\n",
"\n",
" if actions is None:\n",
" disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])\n",
" conAct = con_probs.sample()\n",
" actions = torch.cat([disAct.T, conAct], dim=1)\n",
" else:\n",
" disAct = actions[:, 0 : env.unity_discrete_type].T\n",
" conAct = actions[:, env.unity_discrete_type :]\n",
" dis_log_prob = torch.stack(\n",
" [ctgr.log_prob(act) for act, ctgr in zip(disAct, multi_categoricals)]\n",
" )\n",
" dis_entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])\n",
" return (\n",
" actions,\n",
" dis_log_prob.sum(0),\n",
" dis_entropy.sum(0),\n",
" con_probs.log_prob(conAct).sum(1),\n",
" con_probs.entropy().sum(1),\n",
" self.critic(hidden),\n",
" )\n",
"\n",
"\n",
"mymodel = torch.load(\"../PPO-Model/SmallArea-256-128-hybrid.pt\")\n",
"mymodel.eval()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import numpy as np\n",
"\n",
"x = torch.randn(2, 3).to(\"cuda\")\n",
"print(x)\n",
"print(torch.cat((x, x, x), 0))\n",
"print(torch.cat((x, x, x), 1))\n",
"\n",
"aa = torch.empty(0).to(\"cuda\")\n",
"torch.cat([aa,x])\n",
"bb = [[]]*2\n",
"print(bb)\n",
"bb.append(x.to(\"cpu\").tolist())\n",
"bb.append(x.to(\"cpu\").tolist())\n",
"print(bb)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[-1.1090, 0.4686, 0.6883],\n",
" [-0.1862, -0.3943, -0.0202],\n",
" [ 0.1436, -0.9444, -1.2079],\n",
" [-2.9434, -2.5989, -0.6653],\n",
" [ 0.4668, 0.8548, -0.4641],\n",
" [-0.3956, -0.2832, -0.1889],\n",
" [-0.2801, -0.2092, 1.7254],\n",
" [ 2.7938, -0.7742, 0.7053]], device='cuda:0')\n",
"(8, 0)\n",
"---\n",
"[[array([-1.1090169, 0.4685607, 0.6883437], dtype=float32)], [array([-0.1861974 , -0.39429024, -0.02016036], dtype=float32)], [array([ 0.14360362, -0.9443668 , -1.2079065 ], dtype=float32)], [array([-2.9433894 , -2.598913 , -0.66532046], dtype=float32)], [array([ 0.46684313, 0.8547877 , -0.46408093], dtype=float32)], [array([-0.39563984, -0.2831819 , -0.18891 ], dtype=float32)], [array([-0.28008553, -0.20918302, 1.7253567 ], dtype=float32)], [array([ 2.7938051, -0.7742478, 0.705279 ], dtype=float32)]]\n",
"[[array([-1.1090169, 0.4685607, 0.6883437], dtype=float32)], [], [array([ 0.14360362, -0.9443668 , -1.2079065 ], dtype=float32)], [array([-2.9433894 , -2.598913 , -0.66532046], dtype=float32)], [array([ 0.46684313, 0.8547877 , -0.46408093], dtype=float32)], [array([-0.39563984, -0.2831819 , -0.18891 ], dtype=float32)], [array([-0.28008553, -0.20918302, 1.7253567 ], dtype=float32)], [array([ 2.7938051, -0.7742478, 0.705279 ], dtype=float32)]]\n",
"---\n",
"[array([-1.1090169, 0.4685607, 0.6883437], dtype=float32), array([-1.1090169, 0.4685607, 0.6883437], dtype=float32)]\n",
"vvv tensor([[-1.1090, 0.4686, 0.6883],\n",
" [-1.1090, 0.4686, 0.6883]], device='cuda:0')\n",
"tensor([[-1.1090, 0.4686, 0.6883],\n",
" [-1.1090, 0.4686, 0.6883]], device='cuda:0')\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import torch\n",
"\n",
"agent_num = 8\n",
"ob_buffer = [[]for i in range(agent_num)]\n",
"obs = torch.randn(8, 3).to(\"cuda\")\n",
"print(obs)\n",
"print(np.shape(np.array(ob_buffer)))\n",
"print('---')\n",
"obs_cpu = obs.to(\"cpu\").numpy()\n",
"for i in range(agent_num):\n",
" ob_buffer[i].append(obs_cpu[i])\n",
"print(ob_buffer)\n",
"ob_buffer[1] = []\n",
"print(ob_buffer)\n",
"print('---')\n",
"for i in range(agent_num):\n",
" ob_buffer[i].append(obs_cpu[i])\n",
"print(ob_buffer[0])\n",
"vvv = torch.tensor(ob_buffer[0]).to(\"cuda\")\n",
"print(\"vvv\",vvv)\n",
"empt = torch.tensor([]).to(\"cuda\")\n",
"vvvv = torch.cat((empt,vvv),0)\n",
"print(vvvv)\n",
"vvvv.size()[0]>0"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from AimbotEnv import Aimbot\n",
"from enum import Enum\n",
"import uuid\n",
"from mlagents_envs.side_channel.side_channel import (\n",
" SideChannel,\n",
" IncomingMessage,\n",
" OutgoingMessage,\n",
")\n",
"from typing import List\n",
"\n",
"class Targets(Enum):\n",
" Free = 0\n",
" Go = 1\n",
" Attack = 2\n",
" Num = 3\n",
"TotalRounds = {\"Go\":0,\"Attack\":0,\"Free\":0}\n",
"WinRounds = {\"Go\":0,\"Attack\":0,\"Free\":0}\n",
"\n",
"class AimbotSideChannel(SideChannel):\n",
" def __init__(self, channel_id: uuid.UUID) -> None:\n",
" super().__init__(channel_id)\n",
" def on_message_received(self, msg: IncomingMessage) -> None:\n",
" \"\"\"\n",
" Note: We must implement this method of the SideChannel interface to\n",
" receive messages from Unity\n",
" \"\"\"\n",
" thisMessage = msg.read_string()\n",
" #print(thisMessage)\n",
" thisResult = thisMessage.split(\"|\")\n",
" if(thisResult[0] == \"result\"):\n",
" TotalRounds[thisResult[1]]+=1\n",
" if(thisResult[2] == \"Win\"):\n",
" WinRounds[thisResult[1]]+=1\n",
" #print(TotalRounds)\n",
" #print(WinRounds)\n",
" elif(thisResult[0] == \"Error\"):\n",
" print(thisMessage)\n",
"\t# 发送函数\n",
" def send_string(self, data: str) -> None:\n",
" \"\"\"发送一个字符串给C#\"\"\"\n",
" msg = OutgoingMessage()\n",
" msg.write_string(data)\n",
" super().queue_message_to_send(msg)\n",
"\n",
" def send_bool(self, data: bool) -> None:\n",
" msg = OutgoingMessage()\n",
" msg.write_bool(data)\n",
" super().queue_message_to_send(msg)\n",
"\n",
" def send_int(self, data: int) -> None:\n",
" msg = OutgoingMessage()\n",
" msg.write_int32(data)\n",
" super().queue_message_to_send(msg)\n",
"\n",
" def send_float(self, data: float) -> None:\n",
" msg = OutgoingMessage()\n",
" msg.write_float32(data)\n",
" super().queue_message_to_send(msg)\n",
"\n",
" def send_float_list(self, data: List[float]) -> None:\n",
" msg = OutgoingMessage()\n",
" msg.write_float32_list(data)\n",
" super().queue_message_to_send(msg)\n",
" \n",
"SIDE_CHANNEL_UUID = uuid.UUID(\"8bbfb62a-99b4-457c-879d-b78b69066b5e\")\n",
"ENV_PATH = \"../Build/Build-ParallelEnv-Target-OffPolicy-SingleStack-SideChannel-EndReward/Aimbot-ParallelEnv\"\n",
"aimBotsideChannel = AimbotSideChannel(SIDE_CHANNEL_UUID)\n",
"env = Aimbot(envPath=ENV_PATH, workerID=123, basePort=999,side_channels=[aimBotsideChannel])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"from torch.distributions.normal import Normal\n",
"from torch.distributions.categorical import Categorical\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() and True else \"cpu\")\n",
"\n",
"def layer_init(layer, std=np.sqrt(2), bias_const=0.0):\n",
" torch.nn.init.orthogonal_(layer.weight, std)\n",
" torch.nn.init.constant_(layer.bias, bias_const)\n",
" return layer\n",
"\n",
"class PPOAgent(nn.Module):\n",
" def __init__(self, env: Aimbot,targetNum:int):\n",
" super(PPOAgent, self).__init__()\n",
" self.targetNum = targetNum\n",
" self.discrete_size = env.unity_discrete_size\n",
" self.discrete_shape = list(env.unity_discrete_branches)\n",
" self.continuous_size = env.unity_continuous_size\n",
"\n",
" self.network = nn.Sequential(\n",
" layer_init(nn.Linear(np.array(env.unity_observation_shape).prod(), 500)),\n",
" nn.ReLU(),\n",
" layer_init(nn.Linear(500, 300)),\n",
" nn.ReLU(),\n",
" )\n",
" self.actor_dis = nn.ModuleList([layer_init(nn.Linear(300, self.discrete_size), std=0.01) for i in range(targetNum)])\n",
" self.actor_mean = nn.ModuleList([layer_init(nn.Linear(300, self.continuous_size), std=0.01) for i in range(targetNum)])\n",
" self.actor_logstd = nn.ParameterList([nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(targetNum)])\n",
" self.critic = layer_init(nn.Linear(300, 1), std=1)\n",
"\n",
" def get_value(self, state: torch.Tensor):\n",
" return self.critic(self.network(state))\n",
"\n",
" def get_actions_value(self, state: torch.Tensor, actions=None):\n",
" hidden = self.network(state)\n",
" targets = torch.argmax(state[:,0:self.targetNum],dim=1)\n",
"\n",
" # discrete\n",
" # 递归targets的数量,既agent数来实现根据target不同来选用对应的输出网络计算输出\n",
" dis_logits = torch.stack([self.actor_dis[targets[i]](hidden[i]) for i in range(targets.size()[0])])\n",
" split_logits = torch.split(dis_logits, self.discrete_shape, dim=1)\n",
" multi_categoricals = [Categorical(logits=thisLogits) for thisLogits in split_logits]\n",
" # continuous\n",
" actions_mean = torch.stack([self.actor_mean[targets[i]](hidden[i]) for i in range(targets.size()[0])]) # self.actor_mean(hidden)\n",
" # action_logstd = torch.stack([self.actor_logstd[targets[i]].expand_as(actions_mean) for i in range(targets.size()[0])]) # self.actor_logstd.expand_as(actions_mean)\n",
" # print(action_logstd)\n",
" action_std = torch.squeeze(torch.stack([torch.exp(self.actor_logstd[targets[i]]) for i in range(targets.size()[0])]),dim = -1) # torch.exp(action_logstd)\n",
" con_probs = Normal(actions_mean, action_std)\n",
"\n",
" if actions is None:\n",
" if True:\n",
" # select actions base on probability distribution model\n",
" disAct = torch.stack([ctgr.sample() for ctgr in multi_categoricals])\n",
" conAct = con_probs.sample()\n",
" actions = torch.cat([disAct.T, conAct], dim=1)\n",
" else:\n",
" # select actions base on best probability distribution\n",
" disAct = torch.stack([torch.argmax(logit, dim=1) for logit in split_logits])\n",
" conAct = actions_mean\n",
" actions = torch.cat([disAct.T, conAct], dim=1)\n",
" else:\n",
" disAct = actions[:, 0 : env.unity_discrete_type].T\n",
" conAct = actions[:, env.unity_discrete_type :]\n",
" dis_log_prob = torch.stack(\n",
" [ctgr.log_prob(act) for act, ctgr in zip(disAct, multi_categoricals)]\n",
" )\n",
" dis_entropy = torch.stack([ctgr.entropy() for ctgr in multi_categoricals])\n",
" return (\n",
" actions,\n",
" dis_log_prob.sum(0),\n",
" dis_entropy.sum(0),\n",
" con_probs.log_prob(conAct).sum(1),\n",
" con_probs.entropy().sum(1),\n",
" self.critic(hidden),\n",
" )\n",
"agent = PPOAgent(env,4).to(device)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1. , -10.343613 , 0. , -7.367299 ,\n",
" 0. , 0. , 30. , -10.343662 ,\n",
" 1. , -33.708736 , 1. , 1. ,\n",
" 1. , 1. , 2. , 1. ,\n",
" 1. , 1. , 2. , 2. ,\n",
" 2. , 1. , 1. , 1. ,\n",
" 33.270493 , 39.50663 , 49.146526 , 32.595673 ,\n",
" 30.21616 , 21.163797 , 46.9299 , 1.3264331 ,\n",
" 1.2435672 , 1.2541904 , 30.08522 , 30.041445 ,\n",
" 21.072094 , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 30. , -5.5892515 ,\n",
" 1. , -29.907726 , 1. , 1. ,\n",
" 1. , 1. , 2. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 41.408752 , 47.830173 , 45.03225 , 31.905174 ,\n",
" 41.849663 , 41.849648 , 43.001434 , 45.0322 ,\n",
" 47.48242 , 40.00285 , 41.668346 , 41.607723 ,\n",
" 41.668335 , 0. ],\n",
" [ 1. , 2.9582403 , 0. , -4.699738 ,\n",
" 0. , 0. , 30. , -5.412487 ,\n",
" 1. , -32.79967 , 1. , 2. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 2. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 20.17488 , 49.507687 , 48.162056 , 45.98998 ,\n",
" 44.75835 , 31.08564 , 32.865173 , 24.676666 ,\n",
" 12.952409 , 39.69923 , 44.564423 , 44.49966 ,\n",
" 44.564495 , 0. ],\n",
" [ 2. , -0.20171738, 0. , -10.340863 ,\n",
" 0. , 0. , 30. , -22.987915 ,\n",
" 1. , -34.37514 , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 2. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 11.631058 , 13.872022 , 18.006863 , 27.457632 ,\n",
" 46.343067 , 46.343094 , 20.155125 , 49.867714 ,\n",
" 52.965984 , 56.775608 , 46.14223 , 46.075138 ,\n",
" 46.142246 , 0. ],\n",
" [ 2. , -14.687862 , 0. , -12.615574 ,\n",
" 0. , 0. , 30. , 15.125373 ,\n",
" 1. , -30.849268 , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 2. ,\n",
" 52.430542 , 48.912865 , 46.05145 , 43.974594 ,\n",
" 42.796673 , 26.467875 , 11.072432 , 7.190229 ,\n",
" 5.483198 , 4.5500183 , 42.611244 , 42.549267 ,\n",
" 18.856438 , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 30. , -4.0314903 ,\n",
" 1. , -29.164669 , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 44.074184 , 46.9762 , 44.228096 , 42.2335 ,\n",
" 41.102253 , 41.102367 , 42.233757 , 44.22849 ,\n",
" 44.321827 , 37.335304 , 40.924183 , 40.86467 ,\n",
" 40.924236 , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 30. , -18.603981 ,\n",
" 1. , -29.797592 , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 2. , 2. , 2. ,\n",
" 19.134174 , 22.76088 , 29.468704 , 42.88739 ,\n",
" 41.738823 , 41.739002 , 42.88781 , 44.913647 ,\n",
" 47.704174 , 51.135338 , 20.418388 , 12.470214 ,\n",
" 12.670923 , 0. ],\n",
" [ 0. , 0. , 0. , 0. ,\n",
" 0. , 0. , 30. , -19.07032 ,\n",
" 1. , -30.246218 , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. ,\n",
" 18.336487 , 21.81617 , 28.251017 , 42.977867 ,\n",
" 42.18994 , 42.19034 , 43.351707 , 45.399582 ,\n",
" 48.22037 , 51.68873 , 42.00719 , 41.94621 ,\n",
" 42.00739 , 0. ]], dtype=float32)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"state,_,_ = env.getSteps()\n",
"state"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"env.close()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"float"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"\n",
"aa = torch.Tensor([[1,2,3],[2,2,3],[3,2,3],[4,2,3]]).to(\"cuda\")\n",
"type(torch.mean(aa).item())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "86e2db13b09bd6be22cb599ea60c1572b9ef36ebeaa27a4c8e961d6df315ac32"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}