diff --git a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py index 6f97cca..8f5bd46 100644 --- a/Aimbot-PPO-Python/Pytorch/AimbotEnv.py +++ b/Aimbot-PPO-Python/Pytorch/AimbotEnv.py @@ -11,6 +11,7 @@ from mlagents_envs.side_channel.side_channel import ( IncomingMessage, OutgoingMessage, ) +from arguments import set_save_model class Aimbot(gym.Env): @@ -176,18 +177,21 @@ class AimbotSideChannel(SideChannel): "Warning|Message1|Message2|Message3" or "Error|Message1|Message2|Message3" """ - this_message = msg.read_string() - this_result = this_message.split("|") - print(this_result) - if this_result[0] == "Warning": - if this_result[1] == "Result": - airecorder.total_rounds[this_result[2]] += 1 - if this_result[3] == "Win": - airecorder.win_rounds[this_result[2]] += 1 + this_message_Original = msg.read_string() + this_message = this_message_Original.split("|") + print(this_message) + if this_message[0] == "Warning": + if this_message[1] == "Result": + airecorder.total_rounds[this_message[2]] += 1 + if this_message[3] == "Win": + airecorder.win_rounds[this_message[2]] += 1 # print(TotalRounds) # print(WinRounds) - elif this_result[0] == "Error": - print(this_message) + if this_message[1] == "Command": + set_save_model(True) + print("Command: " + this_message_Original) + elif this_message[0] == "Error": + print(this_message_Original) # # while Message type is Warning # if(thisResult[0] == "Warning"): # # while Message1 is result means one game is over diff --git a/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb b/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb index f4a4a60..60f7284 100644 --- a/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb +++ b/Aimbot-PPO-Python/Pytorch/Archive/test2.ipynb @@ -181,30 +181,84 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([[0., 0., 0., 0.],\n", - " [0., 0., 0., 0.],\n", - " [0., 0., 0., 0.],\n", - " [0., 0., 0., 0.],\n", - " [0., 0., 0., 0.],\n", - " [0., 0., 0., 0.],\n", - " [0., 0., 0., 0.],\n", - " [0., 0., 0., 0.]])" + "3" ] }, - "execution_count": 6, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import numpy as np\n", - "np.zeros((8, 4))" + "y=\"a;b;c\"\n", + "len(y.split(\";\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2]\n" + ] + } + ], + "source": [ + "a = np.array([1,2,3,4])\n", + "print(a[[False,True,False,False]])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{1, 2, 3, 4}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = {1,2,3}\n", + "a.add(4)\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([3, 4])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = np.array([[1,3],[2,4]])\n", + "a.max(axis=1)\n" ] } ], diff --git a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py index f32a1bd..84c56af 100644 --- a/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py +++ b/Aimbot-PPO-Python/Pytorch/MultiNN-PPO.py @@ -4,6 +4,7 @@ import random import uuid import torch import atexit +import os from aimbotEnv import Aimbot from aimbotEnv import AimbotSideChannel @@ -12,13 +13,14 @@ from airecorder import WandbRecorder from aimemory import PPOMem from aimemory import Targets from arguments import parse_args +from arguments import set_save_model, is_save_model import torch.optim as optim # side channel uuid SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e") # tensorboard names -GAME_NAME = "Aimbot_Hybrid_V3" -GAME_TYPE = "Mix_Verification" +GAME_NAME = "Aimbot_Hybrid_Full_MNN_MultiLevel" +GAME_TYPE = "GotoOnly-Level2345" if __name__ == "__main__": args = parse_args() @@ -57,16 +59,6 @@ if __name__ == "__main__": run_name = f"{GAME_TYPE}_{args.seed}_{int(time.time())}" wdb_recorder = WandbRecorder(GAME_NAME, GAME_TYPE, run_name, args) - @atexit.register - def save_model(): - # close env - env.close() - if args.save_model: - # save model while exit - save_dir = "../PPO-Model/" + run_name + "_last.pt" - torch.save(agent, save_dir) - print("save model to " + save_dir) - # start the game total_update_step = args.target_num * args.total_timesteps // args.datasetSize target_steps = [0 for i in range(args.target_num)] @@ -222,11 +214,16 @@ if __name__ == "__main__": ) # print cost time as seconds print("cost time:", time.time() - start_time) - # New Record! - if TotalRewardMean > best_reward and args.save_model: - best_reward = target_reward_mean - saveDir = "../PPO-Model/" + run_name + "_" + str(TotalRewardMean) + ".pt" - torch.save(agent, saveDir) + # New Record! or save model + if ((is_save_model() or TotalRewardMean > best_reward) and args.save_model): + # check saveDir is exist + saveDir = "../PPO-Model/" + run_name + "/" + if not os.path.isdir(saveDir): + os.mkdir(saveDir) + best_reward = TotalRewardMean + torch.save(agent, saveDir + str(TotalRewardMean) + ".pt") + print("Model Saved!") + set_save_model(False) else: # train mode off mean_reward_list = [] # for WANDB @@ -249,7 +246,10 @@ if __name__ == "__main__": TotalRewardMean = np.mean(mean_reward_list) wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps) - saveDir = "../PPO-Model/" + run_name + "_last.pt" - torch.save(agent, saveDir) + saveDir = "../PPO-Model/" + run_name + "/" + if not os.path.isdir(saveDir): + os.mkdir(saveDir) + best_reward = target_reward_mean + torch.save(agent, saveDir + "_last.pt") env.close() wdb_recorder.writer.close() diff --git a/Aimbot-PPO-Python/Pytorch/arguments.py b/Aimbot-PPO-Python/Pytorch/arguments.py index a7b78cb..bcfe8fb 100644 --- a/Aimbot-PPO-Python/Pytorch/arguments.py +++ b/Aimbot-PPO-Python/Pytorch/arguments.py @@ -4,21 +4,19 @@ import uuid from distutils.util import strtobool DEFAULT_SEED = 9331 -ENV_PATH = "../Build/3.1.6/Aimbot-ParallelEnv" +ENV_PATH = "../Build/3.4/Aimbot-ParallelEnv" WAND_ENTITY = "koha9" WORKER_ID = 1 BASE_PORT = 1000 # tensorboard names -GAME_NAME = "Aimbot_Target_Hybrid_Full_MNN_V1" -GAME_TYPE = "Mix_Train" # max round steps per agent is 2500/Decision_period, 25 seconds TOTAL_STEPS = 3150000 BATCH_SIZE = 512 MAX_TRAINNING_DATASETS = 6000 DECISION_PERIOD = 1 -LEARNING_RATE = 6.5e-4 +LEARNING_RATE = 1.5e-4 GAMMA = 0.99 GAE_LAMBDA = 0.95 EPOCHS = 3 @@ -27,18 +25,17 @@ LOSS_COEF = [1.0, 1.0, 1.0, 1.0] # free go attack defence POLICY_COEF = [1.0, 1.0, 1.0, 1.0] ENTROPY_COEF = [0.05, 0.05, 0.05, 0.05] CRITIC_COEF = [0.5, 0.5, 0.5, 0.5] -TARGET_LEARNING_RATE = 1e-5 +TARGET_LEARNING_RATE = 1e-6 FREEZE_VIEW_NETWORK = False -BROADCASTREWARD = False ANNEAL_LEARNING_RATE = True CLIP_VLOSS = True NORM_ADV = False TRAIN = True -SAVE_MODEL = False -WANDB_TACK = False +SAVE_MODEL = True +WANDB_TACK = True LOAD_DIR = None -#LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt" +LOAD_DIR = "../PPO-Model/GotoOnly-Level1234_9331_1697122986/8.853553.pt" # Unity Environment Parameters TARGET_STATE_SIZE = 6 @@ -53,6 +50,16 @@ TARGETNUM= 4 ENV_TIMELIMIT = 30 RESULT_BROADCAST_RATIO = 1/ENV_TIMELIMIT +save_model_this_episode = False + +def is_save_model(): + global save_model_this_episode + return save_model_this_episode +def set_save_model(save_model:bool): + print("set save model to ",save_model) + global save_model_this_episode + save_model_this_episode = save_model + def parse_args(): # fmt: off # pytorch and environment parameters @@ -97,12 +104,10 @@ def parse_args(): help="the number of steps to run in each environment per policy rollout") parser.add_argument("--result-broadcast-ratio", type=float, default=RESULT_BROADCAST_RATIO, help="broadcast result when win round is reached,r=result-broadcast-ratio*remainTime") - parser.add_argument("--broadCastEndReward", type=lambda x: bool(strtobool(x)), default=BROADCASTREWARD, nargs="?", const=True, - help="save model or not") # target_learning_rate parser.add_argument("--target-lr", type=float, default=TARGET_LEARNING_RATE, help="target value of downscaling the learning rate") - + # POLICY_COEF ENTROPY_COEF CRITIC_COEF LOSS_COEF parser.add_argument("--policy-coef", type=float, default=POLICY_COEF, help="coefficient of the policy loss") diff --git a/Aimbot-PPO-Python/Pytorch/ppoagent.py b/Aimbot-PPO-Python/Pytorch/ppoagent.py index aa71166..74232a7 100644 --- a/Aimbot-PPO-Python/Pytorch/ppoagent.py +++ b/Aimbot-PPO-Python/Pytorch/ppoagent.py @@ -49,9 +49,9 @@ class PPOAgent(nn.Module): self.hidden_networks = nn.ModuleList( [ nn.Sequential( - layer_init(nn.Linear(self.state_size, 128)), + layer_init(nn.Linear(self.state_size, 256)), nn.LeakyReLU(), - layer_init(nn.Linear(128, 64)), + layer_init(nn.Linear(256, 128)), nn.LeakyReLU(), ) for i in range(self.target_num) @@ -59,16 +59,16 @@ class PPOAgent(nn.Module): ) self.actor_dis = nn.ModuleList( - [layer_init(nn.Linear(64, self.discrete_size), std=0.5) for i in range(self.target_num)] + [layer_init(nn.Linear(128, self.discrete_size), std=0.5) for i in range(self.target_num)] ) self.actor_mean = nn.ModuleList( - [layer_init(nn.Linear(64, self.continuous_size), std=0.5) for i in range(self.target_num)] + [layer_init(nn.Linear(128, self.continuous_size), std=0) for i in range(self.target_num)] ) self.actor_logstd = nn.ParameterList( [nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(self.target_num)] ) self.critic = nn.ModuleList( - [layer_init(nn.Linear(64, 1), std=1) for i in range(self.target_num)] + [layer_init(nn.Linear(128, 1), std=0) for i in range(self.target_num)] ) def get_value(self, state: torch.Tensor):