对应V3.1.6Play模式
对应V3.1.5的Stay Target进行修改
This commit is contained in:
parent
f9ee51c256
commit
15c1edb6c9
@ -68,6 +68,9 @@ class Aimbot(gym.Env):
|
|||||||
# agents number
|
# agents number
|
||||||
self.unity_agent_num = len(self.unity_agent_IDS)
|
self.unity_agent_num = len(self.unity_agent_IDS)
|
||||||
|
|
||||||
|
# all zero action
|
||||||
|
self.all_zero_action = np.zeros((self.unity_agent_num, self.unity_action_size))
|
||||||
|
|
||||||
def reset(self) -> Tuple[np.ndarray, List, List]:
|
def reset(self) -> Tuple[np.ndarray, List, List]:
|
||||||
"""reset environment and get observations
|
"""reset environment and get observations
|
||||||
|
|
||||||
|
@ -181,31 +181,30 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
|
|
||||||
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mkoha9\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"True"
|
"array([[0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0.],\n",
|
||||||
|
" [0., 0., 0., 0.]])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 1,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import wandb\n",
|
"import numpy as np\n",
|
||||||
"wandb.login()"
|
"np.zeros((8, 4))"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -34,10 +34,11 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# Initialize environment agent optimizer
|
# Initialize environment agent optimizer
|
||||||
aimbot_side_channel = AimbotSideChannel(SIDE_CHANNEL_UUID)
|
aimbot_side_channel = AimbotSideChannel(SIDE_CHANNEL_UUID)
|
||||||
env = Aimbot(env_path=args.path,
|
env = Aimbot(
|
||||||
worker_id=args.workerID,
|
env_path=args.path,
|
||||||
base_port=args.baseport,
|
worker_id=args.workerID,
|
||||||
side_channels=[aimbot_side_channel])
|
base_port=args.baseport,
|
||||||
|
side_channels=[aimbot_side_channel])
|
||||||
if args.load_dir is None:
|
if args.load_dir is None:
|
||||||
agent = PPOAgent(
|
agent = PPOAgent(
|
||||||
env=env,
|
env=env,
|
||||||
@ -104,7 +105,12 @@ if __name__ == "__main__":
|
|||||||
last_reward = [0. for i in range(env.unity_agent_num)]
|
last_reward = [0. for i in range(env.unity_agent_num)]
|
||||||
# MAIN LOOP: run agent in environment
|
# MAIN LOOP: run agent in environment
|
||||||
while True:
|
while True:
|
||||||
# On decision point, choose action by agent
|
# Target Type(state[0][0]) is stay(4),use all zero action
|
||||||
|
if state[0][0] == 4:
|
||||||
|
next_state, reward, next_done = env.step(env.all_zero_action)
|
||||||
|
state, done = next_state, next_done
|
||||||
|
continue
|
||||||
|
# On decision point, and Target Type(state[0][0]) is not stay(4) choose action by agent
|
||||||
if step % args.decision_period == 0:
|
if step % args.decision_period == 0:
|
||||||
step += 1
|
step += 1
|
||||||
# Choose action by agent
|
# Choose action by agent
|
||||||
@ -124,52 +130,54 @@ if __name__ == "__main__":
|
|||||||
next_state, reward, next_done = env.step(action_cpu)
|
next_state, reward, next_done = env.step(action_cpu)
|
||||||
|
|
||||||
# save memories
|
# save memories
|
||||||
ppo_memories.save_memories(
|
if args.train:
|
||||||
now_step=step,
|
ppo_memories.save_memories(
|
||||||
agent=agent,
|
now_step=step,
|
||||||
state=state,
|
agent=agent,
|
||||||
action_cpu=action_cpu,
|
state=state,
|
||||||
dis_logprob_cpu=dis_logprob_cpu,
|
action_cpu=action_cpu,
|
||||||
con_logprob_cpu=con_logprob_cpu,
|
dis_logprob_cpu=dis_logprob_cpu,
|
||||||
reward=reward,
|
con_logprob_cpu=con_logprob_cpu,
|
||||||
done=done,
|
reward=reward,
|
||||||
value_cpu=value_cpu,
|
done=done,
|
||||||
last_reward=last_reward,
|
value_cpu=value_cpu,
|
||||||
next_done=next_done,
|
last_reward=last_reward,
|
||||||
next_state=next_state,
|
next_done=next_done,
|
||||||
)
|
next_state=next_state,
|
||||||
# check if any training dataset is full and ready to train
|
)
|
||||||
for i in range(args.target_num):
|
# check if any training dataset is full and ready to train
|
||||||
if ppo_memories.obs[i].size()[0] >= args.datasetSize:
|
for i in range(args.target_num):
|
||||||
# start train NN
|
if ppo_memories.obs[i].size()[0] >= args.datasetSize:
|
||||||
train_queue.append(i)
|
# start train NN
|
||||||
if len(train_queue) > 0:
|
train_queue.append(i)
|
||||||
# break while loop and start train
|
if len(train_queue) > 0:
|
||||||
break
|
# break while loop and start train
|
||||||
# update state
|
break
|
||||||
|
# update state
|
||||||
state, done = next_state, next_done
|
state, done = next_state, next_done
|
||||||
else:
|
else:
|
||||||
step += 1
|
step += 1
|
||||||
# skip this step use last predict action
|
# skip this step use last predict action
|
||||||
next_state, reward, next_done = env.step(action_cpu)
|
next_state, reward, next_done = env.step(action_cpu)
|
||||||
# save memories
|
# save memories
|
||||||
ppo_memories.save_memories(
|
if args.train:
|
||||||
now_step=step,
|
ppo_memories.save_memories(
|
||||||
agent=agent,
|
now_step=step,
|
||||||
state=state,
|
agent=agent,
|
||||||
action_cpu=action_cpu,
|
state=state,
|
||||||
dis_logprob_cpu=dis_logprob_cpu,
|
action_cpu=action_cpu,
|
||||||
con_logprob_cpu=con_logprob_cpu,
|
dis_logprob_cpu=dis_logprob_cpu,
|
||||||
reward=reward,
|
con_logprob_cpu=con_logprob_cpu,
|
||||||
done=done,
|
reward=reward,
|
||||||
value_cpu=value_cpu,
|
done=done,
|
||||||
last_reward=last_reward,
|
value_cpu=value_cpu,
|
||||||
next_done=next_done,
|
last_reward=last_reward,
|
||||||
next_state=next_state,
|
next_done=next_done,
|
||||||
)
|
next_state=next_state,
|
||||||
# update state
|
)
|
||||||
state = next_state
|
# update state
|
||||||
last_reward = reward
|
state = next_state
|
||||||
|
last_reward = reward
|
||||||
|
|
||||||
if args.train:
|
if args.train:
|
||||||
# train mode on
|
# train mode on
|
||||||
|
@ -34,9 +34,9 @@ BROADCASTREWARD = False
|
|||||||
ANNEAL_LEARNING_RATE = True
|
ANNEAL_LEARNING_RATE = True
|
||||||
CLIP_VLOSS = True
|
CLIP_VLOSS = True
|
||||||
NORM_ADV = False
|
NORM_ADV = False
|
||||||
TRAIN = True
|
TRAIN = False
|
||||||
SAVE_MODEL = True
|
SAVE_MODEL = False
|
||||||
WANDB_TACK = True
|
WANDB_TACK = False
|
||||||
LOAD_DIR = None
|
LOAD_DIR = None
|
||||||
#LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"
|
#LOAD_DIR = "../PPO-Model/PList_Go_LeakyReLU_9331_1677965178_bestGoto/PList_Go_LeakyReLU_9331_1677965178_10.709002.pt"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user