From 15c1edb6c9b347992cb4cd9a96e02b2a10817f8c Mon Sep 17 00:00:00 2001 From: Koha9 Date: Sat, 29 Jul 2023 23:52:20 +0900 Subject: [PATCH] =?UTF-8?q?=E5=AF=B9=E5=BA=94V3.1.6Play=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 对应V3.1.5的Stay Target进行修改 --- Aimbot-PPO-Python/Pytorch/AimbotEnv.py | 3 + Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb | 25 +++-- Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py | 98 ++++++++++--------- Aimbot-PPO-Python/Pytorch/arguments.py | 6 +- 4 files changed, 71 insertions(+), 61 deletions(-) diff --git a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py index 6f43799..6f97cca 100644 --- a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py +++ b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py @@ -68,6 +68,9 @@ class Aimbot(gym.Env): # agents number self.unity_agent_num = len(self.unity_agent_IDS) + # all zero action + self.all_zero_action = np.zeros((self.unity_agent_num, self.unity_action_size)) + def reset(self) -> Tuple[np.ndarray, List, List]: """reset environment and get observations diff --git a/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb b/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb index 1d09dd1..f4a4a60 100644 --- a/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb +++ b/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb @@ -181,31 +181,30 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mkoha9\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" - ] - }, { "data": { "text/plain": [ - "True" + "array([[0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.],\n", + " [0., 0., 0., 0.]])" ] }, - "execution_count": 1, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import wandb\n", - "wandb.login()" + "import numpy as np\n", + "np.zeros((8, 4))" ] } ], diff --git a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py index db157d7..d9f8c5e 100644 --- a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py +++ b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py @@ -34,10 +34,11 @@ if __name__ == "__main__": # Initialize environment agent optimizer aimbot_side_channel = AimbotSideChannel(SIDE_CHANNEL_UUID) - env = Aimbot(env_path=args.path, - worker_id=args.workerID, - base_port=args.baseport, - side_channels=[aimbot_side_channel]) + env = Aimbot( + env_path=args.path, + worker_id=args.workerID, + base_port=args.baseport, + side_channels=[aimbot_side_channel]) if args.load_dir is None: agent = PPOAgent( env=env, @@ -104,7 +105,12 @@ if __name__ == "__main__": last_reward = [0. for i in range(env.unity_agent_num)] # MAIN LOOP: run agent in environment while True: - # On decision point, choose action by agent + # Target Type(state[0][0]) is stay(4),use all zero action + if state[0][0] == 4: + next_state, reward, next_done = env.step(env.all_zero_action) + state, done = next_state, next_done + continue + # On decision point, and Target Type(state[0][0]) is not stay(4) choose action by agent if step % args.decision_period == 0: step += 1 # Choose action by agent @@ -124,52 +130,54 @@ if __name__ == "__main__": next_state, reward, next_done = env.step(action_cpu) # save memories - ppo_memories.save_memories( - now_step=step, - agent=agent, - state=state, - action_cpu=action_cpu, - dis_logprob_cpu=dis_logprob_cpu, - con_logprob_cpu=con_logprob_cpu, - reward=reward, - done=done, - value_cpu=value_cpu, - last_reward=last_reward, - next_done=next_done, - next_state=next_state, - ) - # check if any training dataset is full and ready to train - for i in range(args.target_num): - if ppo_memories.obs[i].size()[0] >= args.datasetSize: - # start train NN - train_queue.append(i) - if len(train_queue) > 0: - # break while loop and start train - break - # update state + if args.train: + ppo_memories.save_memories( + now_step=step, + agent=agent, + state=state, + action_cpu=action_cpu, + dis_logprob_cpu=dis_logprob_cpu, + con_logprob_cpu=con_logprob_cpu, + reward=reward, + done=done, + value_cpu=value_cpu, + last_reward=last_reward, + next_done=next_done, + next_state=next_state, + ) + # check if any training dataset is full and ready to train + for i in range(args.target_num): + if ppo_memories.obs[i].size()[0] >= args.datasetSize: + # start train NN + train_queue.append(i) + if len(train_queue) > 0: + # break while loop and start train + break + # update state state, done = next_state, next_done else: step += 1 # skip this step use last predict action next_state, reward, next_done = env.step(action_cpu) # save memories - ppo_memories.save_memories( - now_step=step, - agent=agent, - state=state, - action_cpu=action_cpu, - dis_logprob_cpu=dis_logprob_cpu, - con_logprob_cpu=con_logprob_cpu, - reward=reward, - done=done, - value_cpu=value_cpu, - last_reward=last_reward, - next_done=next_done, - next_state=next_state, - ) - # update state - state = next_state - last_reward = reward + if args.train: + ppo_memories.save_memories( + now_step=step, + agent=agent, + state=state, + action_cpu=action_cpu, + dis_logprob_cpu=dis_logprob_cpu, + con_logprob_cpu=con_logprob_cpu, + reward=reward, + done=done, + value_cpu=value_cpu, + last_reward=last_reward, + next_done=next_done, + next_state=next_state, + ) + # update state + state = next_state + last_reward = reward if args.train: # train mode on diff --git a/Aimbot-PPO-Python/Pytorch/arguments.py b/Aimbot-PPO-Python/Pytorch/arguments.py index 65d8540..1352b82 100644 --- a/Aimbot-PPO-Python/Pytorch/arguments.py +++ b/Aimbot-PPO-Python/Pytorch/arguments.py @@ -34,9 +34,9 @@ BROADCASTREWARD = False ANNEAL_LEARNING_RATE = True CLIP_VLOSS = True NORM_ADV = False -TRAIN = True -SAVE_MODEL = True -WANDB_TACK = True +TRAIN = False +SAVE_MODEL = False +WANDB_TACK = False LOAD_DIR = None #LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"