From 15c1edb6c9b347992cb4cd9a96e02b2a10817f8c Mon Sep 17 00:00:00 2001
From: Koha9 <UCUNICORN@Hotmail.com>
Date: Sat, 29 Jul 2023 23:52:20 +0900
Subject: [PATCH] =?UTF-8?q?=E5=AF=B9=E5=BA=94V3.1.6Play=E6=A8=A1=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

对应V3.1.5的Stay Target进行修改
---
 Aimbot-PPO-Python/Pytorch/AimbotEnv.py        |  3 +
 Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb | 25 +++--
 Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py      | 98 ++++++++++---------
 Aimbot-PPO-Python/Pytorch/arguments.py        |  6 +-
 4 files changed, 71 insertions(+), 61 deletions(-)

diff --git a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
index 6f43799..6f97cca 100644
--- a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
+++ b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py
@@ -68,6 +68,9 @@ class Aimbot(gym.Env):
         # agents number
         self.unity_agent_num = len(self.unity_agent_IDS)
 
+        # all zero action
+        self.all_zero_action = np.zeros((self.unity_agent_num, self.unity_action_size))
+
     def reset(self) -> Tuple[np.ndarray, List, List]:
         """reset environment and get observations
 
diff --git a/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb b/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb
index 1d09dd1..f4a4a60 100644
--- a/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb
+++ b/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb
@@ -181,31 +181,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mkoha9\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "True"
+       "array([[0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0.],\n",
+       "       [0., 0., 0., 0.]])"
       ]
      },
-     "execution_count": 1,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "import wandb\n",
-    "wandb.login()"
+    "import numpy as np\n",
+    "np.zeros((8, 4))"
    ]
   }
  ],
diff --git a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
index db157d7..d9f8c5e 100644
--- a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
+++ b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
@@ -34,10 +34,11 @@ if __name__ == "__main__":
 
     # Initialize environment agent optimizer
     aimbot_side_channel = AimbotSideChannel(SIDE_CHANNEL_UUID)
-    env = Aimbot(env_path=args.path,
-                 worker_id=args.workerID,
-                 base_port=args.baseport,
-                 side_channels=[aimbot_side_channel])
+    env = Aimbot(
+        env_path=args.path,
+        worker_id=args.workerID,
+        base_port=args.baseport,
+        side_channels=[aimbot_side_channel])
     if args.load_dir is None:
         agent = PPOAgent(
             env=env,
@@ -104,7 +105,12 @@ if __name__ == "__main__":
         last_reward = [0. for i in range(env.unity_agent_num)]
         # MAIN LOOP: run agent in environment
         while True:
-            # On decision point, choose action by agent
+            # Target Type(state[0][0]) is stay(4),use all zero action
+            if state[0][0] == 4:
+                next_state, reward, next_done = env.step(env.all_zero_action)
+                state, done = next_state, next_done
+                continue
+            # On decision point, and Target Type(state[0][0]) is not stay(4) choose action by agent
             if step % args.decision_period == 0:
                 step += 1
                 # Choose action by agent
@@ -124,52 +130,54 @@ if __name__ == "__main__":
                 next_state, reward, next_done = env.step(action_cpu)
 
                 # save memories
-                ppo_memories.save_memories(
-                    now_step=step,
-                    agent=agent,
-                    state=state,
-                    action_cpu=action_cpu,
-                    dis_logprob_cpu=dis_logprob_cpu,
-                    con_logprob_cpu=con_logprob_cpu,
-                    reward=reward,
-                    done=done,
-                    value_cpu=value_cpu,
-                    last_reward=last_reward,
-                    next_done=next_done,
-                    next_state=next_state,
-                )
-                # check if any training dataset is full and ready to train
-                for i in range(args.target_num):
-                    if ppo_memories.obs[i].size()[0] >= args.datasetSize:
-                        # start train NN
-                        train_queue.append(i)
-                if len(train_queue) > 0:
-                    # break while loop and start train
-                    break
-                # update state
+                if args.train:
+                    ppo_memories.save_memories(
+                        now_step=step,
+                        agent=agent,
+                        state=state,
+                        action_cpu=action_cpu,
+                        dis_logprob_cpu=dis_logprob_cpu,
+                        con_logprob_cpu=con_logprob_cpu,
+                        reward=reward,
+                        done=done,
+                        value_cpu=value_cpu,
+                        last_reward=last_reward,
+                        next_done=next_done,
+                        next_state=next_state,
+                    )
+                    # check if any training dataset is full and ready to train
+                    for i in range(args.target_num):
+                        if ppo_memories.obs[i].size()[0] >= args.datasetSize:
+                            # start train NN
+                            train_queue.append(i)
+                    if len(train_queue) > 0:
+                        # break while loop and start train
+                        break
+                    # update state
                 state, done = next_state, next_done
             else:
                 step += 1
                 # skip this step use last predict action
                 next_state, reward, next_done = env.step(action_cpu)
                 # save memories
-                ppo_memories.save_memories(
-                    now_step=step,
-                    agent=agent,
-                    state=state,
-                    action_cpu=action_cpu,
-                    dis_logprob_cpu=dis_logprob_cpu,
-                    con_logprob_cpu=con_logprob_cpu,
-                    reward=reward,
-                    done=done,
-                    value_cpu=value_cpu,
-                    last_reward=last_reward,
-                    next_done=next_done,
-                    next_state=next_state,
-                )
-                # update state
-                state = next_state
-                last_reward = reward
+                if args.train:
+                    ppo_memories.save_memories(
+                        now_step=step,
+                        agent=agent,
+                        state=state,
+                        action_cpu=action_cpu,
+                        dis_logprob_cpu=dis_logprob_cpu,
+                        con_logprob_cpu=con_logprob_cpu,
+                        reward=reward,
+                        done=done,
+                        value_cpu=value_cpu,
+                        last_reward=last_reward,
+                        next_done=next_done,
+                        next_state=next_state,
+                    )
+                    # update state
+                    state = next_state
+                    last_reward = reward
 
         if args.train:
             # train mode on
diff --git a/Aimbot-PPO-Python/Pytorch/arguments.py b/Aimbot-PPO-Python/Pytorch/arguments.py
index 65d8540..1352b82 100644
--- a/Aimbot-PPO-Python/Pytorch/arguments.py
+++ b/Aimbot-PPO-Python/Pytorch/arguments.py
@@ -34,9 +34,9 @@ BROADCASTREWARD = False
 ANNEAL_LEARNING_RATE = True
 CLIP_VLOSS = True
 NORM_ADV = False
-TRAIN = True
-SAVE_MODEL = True
-WANDB_TACK = True
+TRAIN = False
+SAVE_MODEL = False
+WANDB_TACK = False
 LOAD_DIR = None
 #LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"