From 10a16632302f143cfbe6265de21394facb7d3525 Mon Sep 17 00:00:00 2001
From: Koha9 <UCUNICORN@Hotmail.com>
Date: Tue, 8 Aug 2023 20:49:23 +0900
Subject: [PATCH] =?UTF-8?q?=E5=B0=86Tensor=E6=94=B9=E4=B8=BAtensor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tensor与tensor的问题，规范化tensor使用。
---
 Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py |  2 +-
 Aimbot-PPO-Python/Pytorch/aimemory.py    | 10 +++++-----
 Aimbot-PPO-Python/Pytorch/arguments.py   |  2 +-
 Aimbot-PPO-Python/Pytorch/ppoagent.py    |  4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
index 4664a7b..d01872b 100644
--- a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
+++ b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
@@ -112,7 +112,7 @@ if __name__ == "__main__":
                 with torch.no_grad():
                     # predict actions
                     action, dis_logprob, _, con_logprob, _, value = agent.get_actions_value(
-                        torch.Tensor(state).to(device)
+                        torch.tensor(state,dtype=torch.float32).to(device)
                     )
                     value = value.flatten()
 
diff --git a/Aimbot-PPO-Python/Pytorch/aimemory.py b/Aimbot-PPO-Python/Pytorch/aimemory.py
index 8aa3ee3..4ef2dbb 100644
--- a/Aimbot-PPO-Python/Pytorch/aimemory.py
+++ b/Aimbot-PPO-Python/Pytorch/aimemory.py
@@ -61,7 +61,7 @@ class PPOMem:
             thisRewardBF = (np.asarray(thisRewardBF) + (remainTime * self.result_broadcast_ratio)).tolist()
         else:
             print("!!!!!DIDNT GET RESULT REWARD!!!!!!", rewardBF[-1])
-        return torch.Tensor(thisRewardBF).to(self.device)
+        return torch.tensor(thisRewardBF,dtype=torch.float32).to(self.device)
 
     def save_memories(
         self,
@@ -101,10 +101,10 @@ class PPOMem:
                 thisRewardsTensor = self.broad_cast_end_reward(self.rewards_bf[i], remainTime)
                 adv, rt = agent.gae(
                     rewards=thisRewardsTensor,
-                    dones=torch.Tensor(self.dones_bf[i]).to(self.device),
+                    dones=torch.tensor(self.dones_bf[i],dtype=torch.float32).to(self.device),
                     values=torch.tensor(self.values_bf[i]).to(self.device),
                     next_obs=torch.tensor(next_state[i]).to(self.device).unsqueeze(0),
-                    next_done=torch.Tensor([next_done[i]]).to(self.device),
+                    next_done=torch.tensor([next_done[i]],dtype=torch.float32).to(self.device),
                 )
                 # send memories to training datasets
                 self.obs[roundTargetType] = torch.cat((self.obs[roundTargetType], torch.tensor(np.array(self.ob_bf[i])).to(self.device)), 0)
@@ -119,7 +119,7 @@ class PPOMem:
                 # clear buffers
                 self.clear_buffers(i)
                 print(f"train dataset {Targets(roundTargetType).name} added:{self.obs[roundTargetType].size()[0]}/{self.data_set_size}")
-    
+
     def clear_buffers(self,ind:int):
         # clear buffers
         self.ob_bf[ind] = []
@@ -129,7 +129,7 @@ class PPOMem:
         self.rewards_bf[ind] = []
         self.dones_bf[ind] = []
         self.values_bf[ind] = []
-    
+
     def clear_training_datasets(self,ind:int):
         # clear training datasets
         self.obs[ind] = torch.tensor([]).to(self.device)
diff --git a/Aimbot-PPO-Python/Pytorch/arguments.py b/Aimbot-PPO-Python/Pytorch/arguments.py
index 1352b82..f07acca 100644
--- a/Aimbot-PPO-Python/Pytorch/arguments.py
+++ b/Aimbot-PPO-Python/Pytorch/arguments.py
@@ -34,7 +34,7 @@ BROADCASTREWARD = False
 ANNEAL_LEARNING_RATE = True
 CLIP_VLOSS = True
 NORM_ADV = False
-TRAIN = False
+TRAIN = True
 SAVE_MODEL = False
 WANDB_TACK = False
 LOAD_DIR = None
diff --git a/Aimbot-PPO-Python/Pytorch/ppoagent.py b/Aimbot-PPO-Python/Pytorch/ppoagent.py
index 50ab701..53df6c0 100644
--- a/Aimbot-PPO-Python/Pytorch/ppoagent.py
+++ b/Aimbot-PPO-Python/Pytorch/ppoagent.py
@@ -275,8 +275,8 @@ class PPOAgent(nn.Module):
             self,
             rewards: torch.Tensor,
             dones: torch.Tensor,
-            values: torch.tensor,
-            next_obs: torch.tensor,
+            values: torch.Tensor,
+            next_obs: torch.Tensor,
             next_done: torch.Tensor,
     ) -> tuple:
         # GAE