整理无用变量,对环境3.6进行适配
This commit is contained in:
parent
5aa7e0936a
commit
9d9524429c
@ -20,7 +20,7 @@ import torch.optim as optim
|
|||||||
SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
|
SIDE_CHANNEL_UUID = uuid.UUID("8bbfb62a-99b4-457c-879d-b78b69066b5e")
|
||||||
# tensorboard names
|
# tensorboard names
|
||||||
GAME_NAME = "Aimbot_Hybrid_Full_MNN_MultiLevel"
|
GAME_NAME = "Aimbot_Hybrid_Full_MNN_MultiLevel"
|
||||||
GAME_TYPE = "GotoOnly-Level0123-newModel"
|
GAME_TYPE = "GotoOnly-3.6-Level0123-newModel-Onehot"
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
@ -58,6 +58,7 @@ class PPOMem:
|
|||||||
# print("Win! Broadcast reward!",rewardBF[-1])
|
# print("Win! Broadcast reward!",rewardBF[-1])
|
||||||
print(sum(thisRewardBF) / len(thisRewardBF))
|
print(sum(thisRewardBF) / len(thisRewardBF))
|
||||||
thisRewardBF[-1] = rewardBF[-1] - self.base_win_reward
|
thisRewardBF[-1] = rewardBF[-1] - self.base_win_reward
|
||||||
|
# broadcast result reward, increase all reward in this round by remainTime * self.result_broadcast_ratio
|
||||||
thisRewardBF = (np.asarray(thisRewardBF) + (remainTime * self.result_broadcast_ratio)).tolist()
|
thisRewardBF = (np.asarray(thisRewardBF) + (remainTime * self.result_broadcast_ratio)).tolist()
|
||||||
else:
|
else:
|
||||||
print("!!!!!DIDNT GET RESULT REWARD!!!!!!", rewardBF[-1])
|
print("!!!!!DIDNT GET RESULT REWARD!!!!!!", rewardBF[-1])
|
||||||
@ -88,7 +89,7 @@ class PPOMem:
|
|||||||
self.dones_bf[i].append(done[i])
|
self.dones_bf[i].append(done[i])
|
||||||
self.values_bf[i].append(value_cpu[i])
|
self.values_bf[i].append(value_cpu[i])
|
||||||
if now_step % self.decision_period == 0:
|
if now_step % self.decision_period == 0:
|
||||||
# on decision period, add last skiped round's reward
|
# on decision period, add last skiped round's reward, only affact in decision_period != 1
|
||||||
self.rewards_bf[i].append(reward[i] + last_reward[i])
|
self.rewards_bf[i].append(reward[i] + last_reward[i])
|
||||||
else:
|
else:
|
||||||
# not on decision period, only add this round's reward
|
# not on decision period, only add this round's reward
|
||||||
|
@ -4,7 +4,7 @@ import uuid
|
|||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
|
|
||||||
DEFAULT_SEED = 9331
|
DEFAULT_SEED = 9331
|
||||||
ENV_PATH = "../Build/3.4/Aimbot-ParallelEnv"
|
ENV_PATH = "../Build/3.6/Aimbot-ParallelEnv"
|
||||||
WAND_ENTITY = "koha9"
|
WAND_ENTITY = "koha9"
|
||||||
WORKER_ID = 1
|
WORKER_ID = 1
|
||||||
BASE_PORT = 1000
|
BASE_PORT = 1000
|
||||||
@ -16,19 +16,19 @@ TOTAL_STEPS = 3150000
|
|||||||
BATCH_SIZE = 512
|
BATCH_SIZE = 512
|
||||||
MAX_TRAINNING_DATASETS = 6000
|
MAX_TRAINNING_DATASETS = 6000
|
||||||
DECISION_PERIOD = 1
|
DECISION_PERIOD = 1
|
||||||
LEARNING_RATE = 1.5e-4
|
LEARNING_RATE = 5e-5
|
||||||
GAMMA = 0.99
|
GAMMA = 0.999
|
||||||
GAE_LAMBDA = 0.95
|
GAE_LAMBDA = 0.95
|
||||||
EPOCHS = 3
|
EPOCHS = 3
|
||||||
CLIP_COEF = 0.11
|
CLIP_COEF = 0.11
|
||||||
LOSS_COEF = [1.0, 1.0, 1.0, 1.0] # free go attack defence
|
LOSS_COEF = [1.0, 1.0, 1.0, 1.0] # free go attack defence
|
||||||
POLICY_COEF = [1.0, 1.0, 1.0, 1.0]
|
POLICY_COEF = [0.8, 0.8, 0.8, 0.8]
|
||||||
ENTROPY_COEF = [0.05, 0.05, 0.05, 0.05]
|
ENTROPY_COEF = [0.05, 0.05, 0.05, 0.05]
|
||||||
CRITIC_COEF = [0.8, 0.8, 0.8, 0.8]
|
CRITIC_COEF = [1.0, 1.0, 1.0, 1.0]
|
||||||
TARGET_LEARNING_RATE = 1e-6
|
TARGET_LEARNING_RATE = 1e-6
|
||||||
|
|
||||||
FREEZE_VIEW_NETWORK = False
|
FREEZE_VIEW_NETWORK = False
|
||||||
ANNEAL_LEARNING_RATE = True
|
ANNEAL_LEARNING_RATE = False
|
||||||
CLIP_VLOSS = True
|
CLIP_VLOSS = True
|
||||||
NORM_ADV = False
|
NORM_ADV = False
|
||||||
TRAIN = True
|
TRAIN = True
|
||||||
|
@ -14,6 +14,8 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
|
|||||||
nn.init.constant_(layer.bias, bias_const)
|
nn.init.constant_(layer.bias, bias_const)
|
||||||
return layer
|
return layer
|
||||||
|
|
||||||
|
neural_size_1 = 400
|
||||||
|
neural_size_2 = 300
|
||||||
|
|
||||||
class PPOAgent(nn.Module):
|
class PPOAgent(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -31,15 +33,6 @@ class PPOAgent(nn.Module):
|
|||||||
self.unity_action_size = env.unity_action_size
|
self.unity_action_size = env.unity_action_size
|
||||||
self.state_size = self.unity_observation_shape[0]
|
self.state_size = self.unity_observation_shape[0]
|
||||||
self.agent_num = env.unity_agent_num
|
self.agent_num = env.unity_agent_num
|
||||||
self.target_size = self.args.target_state_size
|
|
||||||
self.time_state_size = self.args.time_state_size
|
|
||||||
self.gun_state_size = self.args.gun_state_size
|
|
||||||
self.my_state_size = self.args.my_state_size
|
|
||||||
self.ray_state_size = env.unity_observation_shape[0] - self.args.total_target_size
|
|
||||||
self.state_size_without_ray = self.args.total_target_size
|
|
||||||
self.head_input_size = (
|
|
||||||
env.unity_observation_shape[0] - self.target_size - self.time_state_size - self.gun_state_size
|
|
||||||
) # except target state input
|
|
||||||
|
|
||||||
self.unity_discrete_type = env.unity_discrete_type
|
self.unity_discrete_type = env.unity_discrete_type
|
||||||
self.discrete_size = env.unity_discrete_size
|
self.discrete_size = env.unity_discrete_size
|
||||||
@ -49,9 +42,9 @@ class PPOAgent(nn.Module):
|
|||||||
self.hidden_networks = nn.ModuleList(
|
self.hidden_networks = nn.ModuleList(
|
||||||
[
|
[
|
||||||
nn.Sequential(
|
nn.Sequential(
|
||||||
layer_init(nn.Linear(self.state_size, 256)),
|
layer_init(nn.Linear(self.state_size, neural_size_1)),
|
||||||
nn.LeakyReLU(),
|
nn.LeakyReLU(),
|
||||||
layer_init(nn.Linear(256, 128)),
|
layer_init(nn.Linear(neural_size_1, neural_size_2)),
|
||||||
nn.LeakyReLU(),
|
nn.LeakyReLU(),
|
||||||
)
|
)
|
||||||
for i in range(self.target_num)
|
for i in range(self.target_num)
|
||||||
@ -59,16 +52,16 @@ class PPOAgent(nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.actor_dis = nn.ModuleList(
|
self.actor_dis = nn.ModuleList(
|
||||||
[layer_init(nn.Linear(128, self.discrete_size), std=0.5) for i in range(self.target_num)]
|
[layer_init(nn.Linear(neural_size_2, self.discrete_size), std=0.5) for i in range(self.target_num)]
|
||||||
)
|
)
|
||||||
self.actor_mean = nn.ModuleList(
|
self.actor_mean = nn.ModuleList(
|
||||||
[layer_init(nn.Linear(128, self.continuous_size), std=0) for i in range(self.target_num)]
|
[layer_init(nn.Linear(neural_size_2, self.continuous_size), std=0) for i in range(self.target_num)]
|
||||||
)
|
)
|
||||||
self.actor_logstd = nn.ParameterList(
|
self.actor_logstd = nn.ParameterList(
|
||||||
[nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(self.target_num)]
|
[nn.Parameter(torch.zeros(1, self.continuous_size)) for i in range(self.target_num)]
|
||||||
)
|
)
|
||||||
self.critic = nn.ModuleList(
|
self.critic = nn.ModuleList(
|
||||||
[layer_init(nn.Linear(128, 1), std=0) for i in range(self.target_num)]
|
[layer_init(nn.Linear(neural_size_2, 1), std=0) for i in range(self.target_num)]
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_value(self, state: torch.Tensor):
|
def get_value(self, state: torch.Tensor):
|
||||||
|
Loading…
Reference in New Issue
Block a user