Compare commits
1 Commits
OffP-FullM
...
MultiThrea
Author | SHA1 | Date | |
---|---|---|---|
2ea8a5f104 |
@ -81,184 +81,43 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import argparse\n",
|
||||
"import wandb\n",
|
||||
"import time\n",
|
||||
"import numpy as np\n",
|
||||
"import random\n",
|
||||
"import uuid\n",
|
||||
"import torch\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import torch.optim as optim\n",
|
||||
"\n",
|
||||
"from AimbotEnv import Aimbot\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"from torch.distributions.normal import Normal\n",
|
||||
"from torch.distributions.categorical import Categorical\n",
|
||||
"from distutils.util import strtobool\n",
|
||||
"from torch.utils.tensorboard import SummaryWriter\n",
|
||||
"from mlagents_envs.environment import UnityEnvironment\n",
|
||||
"from mlagents_envs.side_channel.side_channel import (\n",
|
||||
" SideChannel,\n",
|
||||
" IncomingMessage,\n",
|
||||
" OutgoingMessage,\n",
|
||||
")\n",
|
||||
"from typing import List\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "AttributeError",
|
||||
"evalue": "'aaa' object has no attribute 'outa'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[5], line 14\u001b[0m\n\u001b[0;32m 12\u001b[0m asd \u001b[39m=\u001b[39m aaa(outa, outb)\n\u001b[0;32m 13\u001b[0m asd\u001b[39m.\u001b[39mfunc()\n\u001b[1;32m---> 14\u001b[0m \u001b[39mprint\u001b[39m(asd\u001b[39m.\u001b[39;49mouta) \u001b[39m# 输出 100\u001b[39;00m\n",
|
||||
"\u001b[1;31mAttributeError\u001b[0m: 'aaa' object has no attribute 'outa'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"class aaa():\n",
|
||||
" def __init__(self, a, b):\n",
|
||||
" self.a = a\n",
|
||||
" self.b = b\n",
|
||||
"\n",
|
||||
" def func(self):\n",
|
||||
" global outa\n",
|
||||
" outa = 100\n",
|
||||
"\n",
|
||||
"outa = 1\n",
|
||||
"outb = 2\n",
|
||||
"asd = aaa(outa, outb)\n",
|
||||
"asd.func()\n",
|
||||
"print(asd.outa) # 输出 100"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"usage: ipykernel_launcher.py [-h] [--seed SEED]\n",
|
||||
"ipykernel_launcher.py: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme=\"hmac-sha256\" --Session.key=b\"46ef9317-59fb-4ab6-ae4e-6b35744fc423\" --shell=9002 --transport=\"tcp\" --iopub=9004 --f=c:\\Users\\UCUNI\\AppData\\Roaming\\jupyter\\runtime\\kernel-v2-311926K1uko38tdWb.json\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "SystemExit",
|
||||
"evalue": "2",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"An exception has occurred, use %tb to see the full traceback.\n",
|
||||
"\u001b[1;31mSystemExit\u001b[0m\u001b[1;31m:\u001b[0m 2\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import argparse\n",
|
||||
"\n",
|
||||
"def parse_args():\n",
|
||||
" parser = argparse.ArgumentParser()\n",
|
||||
" parser.add_argument(\"--seed\", type=int, default=11,\n",
|
||||
" help=\"seed of the experiment\")\n",
|
||||
" args = parser.parse_args()\n",
|
||||
" return args\n",
|
||||
"\n",
|
||||
"arggg = parse_args()\n",
|
||||
"print(type(arggg))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"y=\"a;b;c\"\n",
|
||||
"len(y.split(\";\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2]\n"
|
||||
"0\n",
|
||||
"i = 0\n",
|
||||
"i = 1\n",
|
||||
"i = 2\n",
|
||||
"i = 3\n",
|
||||
"i = 4\n",
|
||||
"i = 5\n",
|
||||
"i = 6\n",
|
||||
"i = 7\n",
|
||||
"i = 8\n",
|
||||
"i = 9\n",
|
||||
"10\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a = np.array([1,2,3,4])\n",
|
||||
"print(a[[False,True,False,False]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{1, 2, 3, 4}"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a = {1,2,3}\n",
|
||||
"a.add(4)\n",
|
||||
"a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([3, 4])"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a = np.array([[1,3],[2,4]])\n",
|
||||
"a.max(axis=1)\n"
|
||||
"import threading\n",
|
||||
"\n",
|
||||
"num = 0\n",
|
||||
"\n",
|
||||
"def print_numers():\n",
|
||||
" global num\n",
|
||||
" for i in range(10):\n",
|
||||
" num +=1\n",
|
||||
" print(\"i = \",i)\n",
|
||||
"\n",
|
||||
"thread = threading.Thread(target=print_numers)\n",
|
||||
"\n",
|
||||
"print(num)\n",
|
||||
"thread.start()\n",
|
||||
"thread.join()\n",
|
||||
"print(num)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -19,8 +19,8 @@ import torch.optim as optim
|
||||
# side channel uuid
|
||||
SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
|
||||
# tensorboard names
|
||||
GAME_NAME = "Aimbot_Hybrid_Full_MNN_MultiLevel"
|
||||
GAME_TYPE = "GotoOnly-Level2345"
|
||||
GAME_NAME = "Aimbot_Hybrid_Full_MNN_MultiLevel_V2"
|
||||
GAME_TYPE = "GotoOnly-Level0123-new512Model"
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
|
@ -4,7 +4,7 @@ import uuid
|
||||
from distutils.util import strtobool
|
||||
|
||||
DEFAULT_SEED = 9331
|
||||
ENV_PATH = "../Build/3.4/Aimbot-ParallelEnv"
|
||||
ENV_PATH = "../Build/3.5/Aimbot-ParallelEnv"
|
||||
WAND_ENTITY = "koha9"
|
||||
WORKER_ID = 1
|
||||
BASE_PORT = 1000
|
||||
@ -22,9 +22,9 @@ GAE_LAMBDA = 0.95
|
||||
EPOCHS = 3
|
||||
CLIP_COEF = 0.11
|
||||
LOSS_COEF = [1.0, 1.0, 1.0, 1.0] # free go attack defence
|
||||
POLICY_COEF = [1.0, 1.0, 1.0, 1.0]
|
||||
POLICY_COEF = [0.8, 0.8, 0.8, 0.8]
|
||||
ENTROPY_COEF = [0.05, 0.05, 0.05, 0.05]
|
||||
CRITIC_COEF = [0.5, 0.5, 0.5, 0.5]
|
||||
CRITIC_COEF = [0.8, 0.8, 0.8, 0.8]
|
||||
TARGET_LEARNING_RATE = 1e-6
|
||||
|
||||
FREEZE_VIEW_NETWORK = False
|
||||
@ -35,7 +35,7 @@ TRAIN = True
|
||||
SAVE_MODEL = True
|
||||
WANDB_TACK = True
|
||||
LOAD_DIR = None
|
||||
LOAD_DIR = "../PPO-Model/GotoOnly-Level1234_9331_1697122986/8.853553.pt"
|
||||
# LOAD_DIR = "../PPO-Model/GotoOnly-Level0123_9331_1696965321/5.1035867.pt"
|
||||
|
||||
# Unity Environment Parameters
|
||||
TARGET_STATE_SIZE = 6
|
||||
|
255
Aimbot-PPO-Python/Pytorch/multiThread-PPO.py
Normal file
255
Aimbot-PPO-Python/Pytorch/multiThread-PPO.py
Normal file
@ -0,0 +1,255 @@
|
||||
import time
|
||||
import numpy as np
|
||||
import random
|
||||
import uuid
|
||||
import torch
|
||||
import atexit
|
||||
import os
|
||||
|
||||
from aimbotEnv import Aimbot
|
||||
from aimbotEnv import AimbotSideChannel
|
||||
from ppoagent import PPOAgent
|
||||
from airecorder import WandbRecorder
|
||||
from aimemory import PPOMem
|
||||
from aimemory import Targets
|
||||
from arguments import parse_args
|
||||
from arguments import set_save_model, is_save_model
|
||||
import torch.optim as optim
|
||||
|
||||
# side channel uuid
|
||||
SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
|
||||
# tensorboard names
|
||||
GAME_NAME = "Aimbot_Hybrid_Full_MNN_MultiLevel_V2"
|
||||
GAME_TYPE = "GotoOnly-Level0123-new512Model"
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
random.seed(args.seed)
|
||||
np.random.seed(args.seed)
|
||||
torch.manual_seed(args.seed)
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
|
||||
best_reward = -1
|
||||
|
||||
# Initialize environment agent optimizer
|
||||
aimbot_side_channel = AimbotSideChannel(SIDE_CHANNEL_UUID)
|
||||
env = Aimbot(
|
||||
env_path=args.path,
|
||||
worker_id=args.workerID,
|
||||
base_port=args.baseport,
|
||||
side_channels=[aimbot_side_channel])
|
||||
if args.load_dir is None:
|
||||
agent = PPOAgent(
|
||||
env=env,
|
||||
this_args=args,
|
||||
device=device,
|
||||
).to(device)
|
||||
else:
|
||||
agent = torch.load(args.load_dir)
|
||||
# freeze
|
||||
if args.freeze_viewnet:
|
||||
# freeze the view network
|
||||
print("FREEZE VIEW NETWORK is not compatible with Full MNN!")
|
||||
raise NotImplementedError
|
||||
print("Load Agent", args.load_dir)
|
||||
print(agent.eval())
|
||||
# optimizer
|
||||
optimizer = optim.Adam(agent.parameters(), lr=args.lr, eps=1e-5)
|
||||
# Tensorboard and WandB Recorder
|
||||
run_name = f"{GAME_TYPE}_{args.seed}_{int(time.time())}"
|
||||
wdb_recorder = WandbRecorder(GAME_NAME, GAME_TYPE, run_name, args)
|
||||
|
||||
# start the game
|
||||
total_update_step = args.target_num * args.total_timesteps // args.datasetSize
|
||||
target_steps = [0 for i in range(args.target_num)]
|
||||
start_time = time.time()
|
||||
state, _, done = env.reset()
|
||||
|
||||
# initialize AI memories
|
||||
ppo_memories = PPOMem(
|
||||
args=args,
|
||||
unity_agent_num=env.unity_agent_num,
|
||||
device=device,
|
||||
)
|
||||
|
||||
# MAIN LOOP: run agent in environment
|
||||
for total_steps in range(total_update_step):
|
||||
# discount learning rate, while step == total_update_step lr will be 0
|
||||
if args.annealLR:
|
||||
final_lr_ratio = args.target_lr / args.lr
|
||||
frac = 1.0 - ((total_steps + 1.0) / total_update_step)
|
||||
lr_now = frac * args.lr
|
||||
optimizer.param_groups[0]["lr"] = lr_now
|
||||
else:
|
||||
lr_now = args.lr
|
||||
|
||||
# episode start show learning rate
|
||||
print("new episode", total_steps, "learning rate = ", lr_now)
|
||||
step = 0
|
||||
training = False
|
||||
train_queue = []
|
||||
last_reward = [0. for i in range(env.unity_agent_num)]
|
||||
# MAIN LOOP: run agent in environment
|
||||
while True:
|
||||
# Target Type(state[0][0]) is stay(4),use all zero action
|
||||
if state[0][0] == 4:
|
||||
next_state, reward, next_done = env.step(env.all_zero_action)
|
||||
state, done = next_state, next_done
|
||||
continue
|
||||
# On decision point, and Target Type(state[0][0]) is not stay(4) choose action by agent
|
||||
if step % args.decision_period == 0:
|
||||
step += 1
|
||||
# Choose action by agent
|
||||
with torch.no_grad():
|
||||
# predict actions
|
||||
action, dis_logprob, _, con_logprob, _, value = agent.get_actions_value(
|
||||
torch.tensor(state,dtype=torch.float32).to(device)
|
||||
)
|
||||
value = value.flatten()
|
||||
|
||||
# variable from GPU to CPU
|
||||
action_cpu = action.cpu().numpy()
|
||||
dis_logprob_cpu = dis_logprob.cpu().numpy()
|
||||
con_logprob_cpu = con_logprob.cpu().numpy()
|
||||
value_cpu = value.cpu().numpy()
|
||||
# Environment step
|
||||
next_state, reward, next_done = env.step(action_cpu)
|
||||
|
||||
# save memories
|
||||
if args.train:
|
||||
ppo_memories.save_memories(
|
||||
now_step=step,
|
||||
agent=agent,
|
||||
state=state,
|
||||
action_cpu=action_cpu,
|
||||
dis_logprob_cpu=dis_logprob_cpu,
|
||||
con_logprob_cpu=con_logprob_cpu,
|
||||
reward=reward,
|
||||
done=done,
|
||||
value_cpu=value_cpu,
|
||||
last_reward=last_reward,
|
||||
next_done=next_done,
|
||||
next_state=next_state,
|
||||
)
|
||||
# check if any training dataset is full and ready to train
|
||||
for i in range(args.target_num):
|
||||
if ppo_memories.obs[i].size()[0] >= args.datasetSize:
|
||||
# start train NN
|
||||
train_queue.append(i)
|
||||
if len(train_queue) > 0:
|
||||
# break while loop and start train
|
||||
break
|
||||
# update state
|
||||
state, done = next_state, next_done
|
||||
else:
|
||||
step += 1
|
||||
# skip this step use last predict action
|
||||
next_state, reward, next_done = env.step(action_cpu)
|
||||
# save memories
|
||||
if args.train:
|
||||
ppo_memories.save_memories(
|
||||
now_step=step,
|
||||
agent=agent,
|
||||
state=state,
|
||||
action_cpu=action_cpu,
|
||||
dis_logprob_cpu=dis_logprob_cpu,
|
||||
con_logprob_cpu=con_logprob_cpu,
|
||||
reward=reward,
|
||||
done=done,
|
||||
value_cpu=value_cpu,
|
||||
last_reward=last_reward,
|
||||
next_done=next_done,
|
||||
next_state=next_state,
|
||||
)
|
||||
# update state
|
||||
state = next_state
|
||||
last_reward = reward
|
||||
|
||||
if args.train:
|
||||
# train mode on
|
||||
mean_reward_list = [] # for WANDB
|
||||
# loop all training queue
|
||||
for this_train_ind in train_queue:
|
||||
# start time
|
||||
start_time = time.time()
|
||||
target_steps[this_train_ind] += 1
|
||||
# train agent
|
||||
(
|
||||
v_loss,
|
||||
dis_pg_loss,
|
||||
con_pg_loss,
|
||||
loss,
|
||||
entropy_loss
|
||||
) = agent.train_net(
|
||||
this_train_ind=this_train_ind,
|
||||
ppo_memories=ppo_memories,
|
||||
optimizer=optimizer
|
||||
)
|
||||
# record mean reward before clear history
|
||||
print("done")
|
||||
target_reward_mean = np.mean(ppo_memories.rewards[this_train_ind].to("cpu").detach().numpy().copy())
|
||||
mean_reward_list.append(target_reward_mean)
|
||||
targetName = Targets(this_train_ind).name
|
||||
|
||||
# clear this target training set buffer
|
||||
ppo_memories.clear_training_datasets(this_train_ind)
|
||||
# record rewards for plotting purposes
|
||||
wdb_recorder.add_target_scalar(
|
||||
targetName,
|
||||
this_train_ind,
|
||||
v_loss,
|
||||
dis_pg_loss,
|
||||
con_pg_loss,
|
||||
loss,
|
||||
entropy_loss,
|
||||
target_reward_mean,
|
||||
target_steps,
|
||||
)
|
||||
print(f"episode over Target{targetName} mean reward:", target_reward_mean)
|
||||
TotalRewardMean = np.mean(mean_reward_list)
|
||||
wdb_recorder.add_global_scalar(
|
||||
TotalRewardMean,
|
||||
optimizer.param_groups[0]["lr"],
|
||||
total_steps,
|
||||
)
|
||||
# print cost time as seconds
|
||||
print("cost time:", time.time() - start_time)
|
||||
# New Record! or save model
|
||||
if ((is_save_model() or TotalRewardMean > best_reward) and args.save_model):
|
||||
# check saveDir is exist
|
||||
saveDir = "../PPO-Model/" + run_name + "/"
|
||||
if not os.path.isdir(saveDir):
|
||||
os.mkdir(saveDir)
|
||||
best_reward = TotalRewardMean
|
||||
torch.save(agent, saveDir + str(TotalRewardMean) + ".pt")
|
||||
print("Model Saved!")
|
||||
set_save_model(False)
|
||||
else:
|
||||
# train mode off
|
||||
mean_reward_list = [] # for WANDB
|
||||
# while not in training mode, clear the buffer
|
||||
for this_train_ind in train_queue:
|
||||
target_steps[this_train_ind] += 1
|
||||
targetName = Targets(this_train_ind).name
|
||||
target_reward_mean = np.mean(ppo_memories.rewards[this_train_ind].to("cpu").detach().numpy().copy())
|
||||
mean_reward_list.append(target_reward_mean)
|
||||
print(target_steps[this_train_ind])
|
||||
|
||||
# clear this target training set buffer
|
||||
ppo_memories.clear_training_datasets(this_train_ind)
|
||||
|
||||
# record rewards for plotting purposes
|
||||
wdb_recorder.writer.add_scalar(f"Target{targetName}/Reward", target_reward_mean,
|
||||
target_steps[this_train_ind])
|
||||
wdb_recorder.add_win_ratio(targetName, target_steps[this_train_ind])
|
||||
print(f"episode over Target{targetName} mean reward:", target_reward_mean)
|
||||
TotalRewardMean = np.mean(mean_reward_list)
|
||||
wdb_recorder.writer.add_scalar("GlobalCharts/TotalRewardMean", TotalRewardMean, total_steps)
|
||||
|
||||
saveDir = "../PPO-Model/" + run_name + "/"
|
||||
if not os.path.isdir(saveDir):
|
||||
os.mkdir(saveDir)
|
||||
best_reward = target_reward_mean
|
||||
torch.save(agent, saveDir + "_last.pt")
|
||||
env.close()
|
||||
wdb_recorder.writer.close()
|
@ -8,6 +8,8 @@ from aimbotEnv import Aimbot
|
||||
from torch.distributions.normal import Normal
|
||||
from torch.distributions.categorical import Categorical
|
||||
|
||||
firstLayerNum = 512
|
||||
secondLayerNum = 128
|
||||
|
||||
def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
|
||||
nn.init.orthogonal_(layer.weight, std)
|
||||
@ -49,9 +51,9 @@ class PPOAgent(nn.Module):
|
||||
self.hidden_networks = nn.ModuleList(
|
||||
[
|
||||
nn.Sequential(
|
||||
layer_init(nn.Linear(self.state_size, 256)),
|
||||
layer_init(nn.Linear(self.state_size, firstLayerNum)),
|
||||
nn.LeakyReLU(),
|
||||
layer_init(nn.Linear(256, 128)),
|
||||
layer_init(nn.Linear(firstLayerNum, secondLayerNum)),
|
||||
nn.LeakyReLU(),
|
||||
)
|
||||
for i in range(self.target_num)
|
||||
@ -59,16 +61,16 @@ class PPOAgent(nn.Module):
|
||||
)
|
||||
|
||||
self.actor_dis = nn.ModuleList(
|
||||
[layer_init(nn.Linear(128, self.discrete_size), std=0.5) for i in range(self.target_num)]
|
||||
[layer_init(nn.Linear(secondLayerNum, self.discrete_size), std=0.5) for i in range(self.target_num)]
|
||||
)
|
||||
self.actor_mean = nn.ModuleList(
|
||||
[layer_init(nn.Linear(128, self.continuous_size), std=0) for i in range(self.target_num)]
|
||||
[layer_init(nn.Linear(secondLayerNum, self.continuous_size), std=0) for i in range(self.target_num)]
|
||||
)
|
||||
self.actor_logstd = nn.ParameterList(
|
||||
[nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(self.target_num)]
|
||||
)
|
||||
self.critic = nn.ModuleList(
|
||||
[layer_init(nn.Linear(128, 1), std=0) for i in range(self.target_num)]
|
||||
[layer_init(nn.Linear(secondLayerNum, 1), std=0) for i in range(self.target_num)]
|
||||
)
|
||||
|
||||
def get_value(self, state: torch.Tensor):
|
||||
|
Loading…
Reference in New Issue
Block a user