wrong remain Time Fix

wrong remain Time Fix, what a stupid mistake... and fix doubled WANDB writer
2022-12-04 09:20:05 +09:00 · 2022-12-04 09:20:05 +09:00 · 1787872e82
commit 1787872e82
parent ad9817e7a4
1 changed files with 3 additions and 3 deletions
--- a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
+++ b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py
@ -65,6 +65,7 @@ class Targets(Enum):
    Attack = 2
    Defence = 3
    Num = 4
+STATE_REMAINTIME_POSITION = 6
 BASE_WINREWARD = 999
 BASE_LOSEREWARD = -999
 TARGETNUM= 4
@ -417,7 +418,7 @@ if __name__ == "__main__":
                value_cpu = value.cpu().numpy()
                # Environment step
                next_state, reward, next_done = env.step(action_cpu)
-
+                remainTime = state[i,STATE_REMAINTIME_POSITION]
                # save memories
                for i in range(env.unity_agent_num):
                    # save memories to buffers
@ -433,7 +434,7 @@ if __name__ == "__main__":
                        # compute advantage and discounted reward
                        #print(i,"over")
                        roundTargetType = int(state[i,0])
-                        thisRewardsTensor = broadCastEndReward(rewards_bf[i],roundTargetType)
+                        thisRewardsTensor = broadCastEndReward(rewards_bf[i],remainTime)
                        adv, rt = GAE(
                            agent,
                            args,
@ -646,7 +647,6 @@ if __name__ == "__main__":

                # record rewards for plotting purposes
                writer.add_scalar(f"Target{targetName}/value_loss", v_loss.item(), target_steps[thisT])
-                writer.add_scalar(f"Target{targetName}/value_loss", v_loss.item(), target_steps[thisT])
                writer.add_scalar(f"Target{targetName}/dis_policy_loss", dis_pg_loss.item(), target_steps[thisT])
                writer.add_scalar(f"Target{targetName}/con_policy_loss", con_pg_loss.item(), target_steps[thisT])
                writer.add_scalar(f"Target{targetName}/total_loss", loss.item(), target_steps[thisT])