Side Channel Added

add side Channel to save target win ratio.
This commit is contained in:
Koha9 2022-11-30 07:01:05 +09:00
parent a0895c7449
commit 1e974ada2a
2 changed files with 67 additions and 3 deletions

View File

@ -12,12 +12,13 @@ class Aimbot(gym.Env):
envPath: str, envPath: str,
workerID: int = 1, workerID: int = 1,
basePort: int = 100, basePort: int = 100,
side_channels: list = []
): ):
super(Aimbot, self).__init__() super(Aimbot, self).__init__()
self.env = UnityEnvironment( self.env = UnityEnvironment(
file_name=envPath, file_name=envPath,
seed=1, seed=1,
side_channels=[], side_channels=side_channels,
worker_id=workerID, worker_id=workerID,
base_port=basePort, base_port=basePort,
) )

View File

@ -3,6 +3,7 @@ import wandb
import time import time
import numpy as np import numpy as np
import random import random
import uuid
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
@ -12,15 +13,24 @@ from torch.distributions.normal import Normal
from torch.distributions.categorical import Categorical from torch.distributions.categorical import Categorical
from distutils.util import strtobool from distutils.util import strtobool
from torch.utils.tensorboard import SummaryWriter from torch.utils.tensorboard import SummaryWriter
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.side_channel import (
SideChannel,
IncomingMessage,
OutgoingMessage,
)
from typing import List
bestReward = 0 bestReward = 0
DEFAULT_SEED = 9331 DEFAULT_SEED = 9331
ENV_PATH = "../Build/Build-ParallelEnv-BigArea-6Enemy/Aimbot-ParallelEnv" ENV_PATH = "../Build/Build-ParallelEnv-Target-OffPolicy-SingleStack-SideChannel/Aimbot-ParallelEnv"
SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
WAND_ENTITY = "koha9" WAND_ENTITY = "koha9"
WORKER_ID = 1 WORKER_ID = 1
BASE_PORT = 1000 BASE_PORT = 1000
# !!!check every parameters before run!!!
TOTAL_STEPS = 2000000 TOTAL_STEPS = 2000000
STEP_NUM = 314 STEP_NUM = 314
@ -44,6 +54,10 @@ WANDB_TACK = False
LOAD_DIR = None LOAD_DIR = None
# LOAD_DIR = "../PPO-Model/SmallArea-256-128-hybrid-2nd-trainning.pt" # LOAD_DIR = "../PPO-Model/SmallArea-256-128-hybrid-2nd-trainning.pt"
# public data
TotalRounds = {"Go":0,"Attack":0,"Free":0}
WinRounds = {"Go":0,"Attack":0,"Free":0}
def parse_args(): def parse_args():
# fmt: off # fmt: off
@ -178,6 +192,51 @@ class PPOAgent(nn.Module):
self.critic(hidden), self.critic(hidden),
) )
class AimbotSideChannel(SideChannel):
def __init__(self, channel_id: uuid.UUID) -> None:
super().__init__(channel_id)
def on_message_received(self, msg: IncomingMessage) -> None:
"""
Note: We must implement this method of the SideChannel interface to
receive messages from Unity
"""
thisMessage = msg.read_string()
print(thisMessage)
thisResult = thisMessage.split("|")
if(thisResult[0] == "result"):
TotalRounds[thisResult[1]]+=1
if(thisResult[2] == "Win"):
WinRounds[thisResult[1]]+=1
print(TotalRounds)
print(WinRounds)
elif(thisResult[0] == "Error"):
print(thisMessage)
# 发送函数
def send_string(self, data: str) -> None:
"""发送一个字符串给C#"""
msg = OutgoingMessage()
msg.write_string(data)
super().queue_message_to_send(msg)
def send_bool(self, data: bool) -> None:
msg = OutgoingMessage()
msg.write_bool(data)
super().queue_message_to_send(msg)
def send_int(self, data: int) -> None:
msg = OutgoingMessage()
msg.write_int32(data)
super().queue_message_to_send(msg)
def send_float(self, data: float) -> None:
msg = OutgoingMessage()
msg.write_float32(data)
super().queue_message_to_send(msg)
def send_float_list(self, data: List[float]) -> None:
msg = OutgoingMessage()
msg.write_float32_list(data)
super().queue_message_to_send(msg)
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
@ -188,7 +247,8 @@ if __name__ == "__main__":
device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu") device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
# Initialize environment anget optimizer # Initialize environment anget optimizer
env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport) aimBotsideChannel = AimbotSideChannel(SIDE_CHANNEL_UUID);
env = Aimbot(envPath=args.path, workerID=args.workerID, basePort=args.baseport,side_channels=[aimBotsideChannel])
if args.load_dir is None: if args.load_dir is None:
agent = PPOAgent(env).to(device) agent = PPOAgent(env).to(device)
else: else:
@ -424,6 +484,9 @@ if __name__ == "__main__":
"charts/SPS", int(global_step / (time.time() - start_time)), global_step "charts/SPS", int(global_step / (time.time() - start_time)), global_step
) )
writer.add_scalar("charts/Reward", rewardsMean, global_step) writer.add_scalar("charts/Reward", rewardsMean, global_step)
writer.add_scalar("charts/GoWinRatio", WinRounds["Go"]/TotalRounds["Go"] if TotalRounds["Go"] != 0 else 0, global_step)
writer.add_scalar("charts/AttackWinRatio", WinRounds["Attack"]/TotalRounds["Attack"] if TotalRounds["Attack"] != 0 else 0, global_step)
writer.add_scalar("charts/FreeWinRatio", WinRounds["Free"]/TotalRounds["Free"] if TotalRounds["Free"] != 0 else 0, global_step)
if rewardsMean > bestReward: if rewardsMean > bestReward:
bestReward = rewardsMean bestReward = rewardsMean
saveDir = "../PPO-Model/bigArea-384-128-hybrid-" + str(rewardsMean) + ".pt" saveDir = "../PPO-Model/bigArea-384-128-hybrid-" + str(rewardsMean) + ".pt"