Aimbot-PPO/Aimbot-PPO-Python/main.py

25 lines
644 B
Python
Raw Normal View History

import aimBotEnv
import PPO
ENV_PATH = './Build/Aimbot-PPO'
WORKER_ID = 100
MAX_EP = 1000
EP_LENGTH = 400
GAMMA = 0.99 # discount future reward (UP?)
EPSILON = 0.2 # clip Ratio range[1-EPSILON,1+EPSILON]
ACTOR_LR = 1e-5 # LR
CRITIC_LR = 2e-5 # LR
BATCH = 32 # learning step
ACTOR_EPOCH = 10 # epoch
CRITIC_EPOCH = 10 # epoch
ENTROPY_WHEIGHT = 0.01 # sigma's entropy in Actor loss
ACTION_INTERVAL = 1 # take action every ACTION_INTERVAL steps
TRAIN = True
env = aimBotEnv.makeEnv(envPath = ENV_PATH,workerID = WORKER_ID)
STATE_SIZE = env.STATE_SIZE
CONTINUOUS_SIZE = env.CONTINUOUS_SIZE
DISCRETE_SIZE = env.DISCRETE_SIZE
CTN_ACTION_RANGE = 2